# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [114]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [115]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [116]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [117]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    N = x.shape[0]
    idx= np.random.permutation(N)
    x=x[idx]
    y=y[idx]
    x_train, y_train = x[:int(train_size*N)],y[:int(train_size*N)]
    x_test, y_test = x[int(train_size*N):],y[int(train_size*N):]
    return x_train, y_train, x_test, y_test


In [118]:
X_train, y_train, X_test, y_test= split_data(x,y) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [119]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  cov_matrix = np.zeros((x.shape[1],x.shape[1]))
  for i in range(x.shape[1]):
    for j in range(x.shape[1]):
      #for d in x:
      cov_matrix[i,j]=np.sum((x[:,i]-mu[i])*(x[:,j]-mu[j]))/(x.shape[0]-1)
  return cov_matrix      



In [120]:
np.cov(x,rowvar=0)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [121]:
covariance(x, x.mean(0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [122]:
a= np.array([[1,3,8],[1,8,8]])
i,j=enumerate(a)
i


(0, array([1, 3, 8]))

In [133]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None
    
  def fit(self,x,y):
    k=len(np.unique(y)) # Number of class.
    d=x.shape[1]  # Input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))  #: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(k) # k-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.

    for cl in range(k):
      self.phi[cl] = (np.sum(cl==y))/m
      self.mu[cl] = np.mean(x[(cl==y)],axis=0)
      self.sigma[cl] = covariance(x[(cl==y)], self.mu[cl])



  def predict_proba(self,x):
    # reshape or flatt x.
    #x= x.reshape(x.shape[0],-1)
    d= x.shape[1]
    k_class= self.mu.shape[0] # Number of classes we have in our case it's k = 2
    
    ## START THE LEARNING: estimate mu, phi and sigma.
 
    scores = np.zeros((x.shape[0],k_class))
    for cl in range(k_class):
      detsigma = np.linalg.det(self.sigma[cl])
      invsigma = np.linalg.inv(self.sigma[cl])
      for i in range(x.shape[0]):
        C=1/(((2*np.pi)**(d/2))*(detsigma**0.5))
        E = np.exp(-0.5*(x[i]-self.mu[cl]).T@invsigma@(x[i]-self.mu[cl]))
        Q=self.phi[cl]
        #B= Q**cl*(1-Q)**cl
        scores[i,cl] = C*E*Q
        #scores[i,cl] = (1/((2*np.pi)**d/2*(detsigma**1/2)))*np.exp((x[i]-self.mu[cl])@invsigma@(x[i]-self.mu[cl]))*self.phi[cl]
    return scores


  def predict(self,x):
   proba =self.predict_proba(x)
   y_pred =np.argmax(proba, axis=1)
   return y_pred
  
  def accuracy(self, y, ypreds):
    return np.mean(y==ypreds)*100
    

In [134]:
model= GDA()
model.fit(X_train,y_train)
model.phi

array([0.50125, 0.49875])

In [135]:
yproba= model.predict_proba(X_test)
yproba

array([[1.38115050e-001, 7.41690431e-005],
       [1.71301760e-161, 4.01241864e-003],
       [4.40657205e-002, 3.25208277e-006],
       [3.37689221e-058, 3.81808117e-002],
       [8.95686269e-002, 5.91308075e-008],
       [1.12195883e-001, 3.80383165e-004],
       [2.20968955e-077, 3.77336570e-002],
       [7.23705879e-007, 1.93678629e-002],
       [1.65099157e-012, 2.81056211e-002],
       [3.59273450e-071, 1.36452173e-002],
       [4.43673516e-048, 1.99382452e-002],
       [1.04517317e-001, 9.13765826e-006],
       [6.16159142e-002, 4.66789011e-004],
       [2.24695850e-033, 2.70488829e-002],
       [6.33040801e-002, 6.15694806e-004],
       [2.16745671e-002, 8.03950490e-005],
       [2.28941999e-002, 3.76731394e-009],
       [6.04120123e-043, 3.14988501e-003],
       [6.88520182e-003, 9.79432359e-003],
       [1.54479867e-008, 2.86377852e-002],
       [1.35525710e-024, 8.74460301e-003],
       [1.44848206e-021, 1.07562715e-002],
       [7.31293917e-002, 3.86017437e-010],
       [9.7

In [136]:
ypreds= model.predict(X_test)
ypreds


array([0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [137]:
model.accuracy(y_test, ypreds)

97.0