# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [46]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [47]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    n=x.shape[0]
    data=np.concatenate([x,y.reshape(-1,1)], axis=1)
    np.random.shuffle(data)
    X_train, X_test=data[:int(n*train_size),:-1], data[int(n*train_size):,:-1]
    y_train, y_test=data[:int(n*train_size),-1], data[int(n*train_size):,-1]
    
    return X_train, X_test, y_train, y_test

In [48]:
X_train, X_test, y_train, y_test= split_data(x,y,0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [37]:
def covariance(x, mu):
  n,d=x.shape
  cov=np.zeros((d,d))
  print(cov.shape)
  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  for i in range(d):
    for j in range(d):
      cov[i,j]=np.sum((x[:,i]-mu[i])*(x[:,j]-mu[j]))/n-1
  return cov

In [38]:
covariance(X_train, mu=np.mean(x, axis=0))

(3, 3)


array([[ 0.84469322, -0.98336219, -0.01967872],
       [-0.98336219,  0.03110933, -0.94072443],
       [-0.01967872, -0.94072443,  0.68385262]])

In [39]:
def covariance(x, mu):
  n,d=x.shape
  cov=np.zeros((d,d))
  print(cov.shape)
  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  for i in range(d):
    for j in range(d):
      for k in range(n):
        cov[i,j]=np.sum((x[k,i]-mu[i])*(x[k,j]-mu[j]).T)/(n-1)
  return cov

In [41]:
covariance(X_train, mu=np.mean(x, axis=0))

(3, 3)


array([[0.00121085, 0.00116639, 0.00105858],
       [0.00116639, 0.00112355, 0.00101971],
       [0.00105858, 0.00101971, 0.00092546]])

In [43]:
cov=np.cov(X_train, rowvar=0)

In [77]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu=None
    self.phi=None
    self.sigma=None
    
  def fit(self,x,y):
    k=  2 # Number of class.
    d= x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(d)# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
   
    for idx in range(k): 
      self.phi[idx]=np.sum(x[idx==y])/m
      self.mu[idx]=np.mean(x[idx==y], axis=0)
      self.sigma[idx]=covariance(x[idx==y], self.mu[idx])
    #print(self.phi)
    #print(self.mu)
    print(self.sigma)

  def predict_proba(self,x):
    
    # reshape or flatt x.
    #x= np.reshape(-1,1)
    d= x.shape[1]
    k_class=self.mu.shape[0] # Number of classes we have in our case it's k = 2

    proba=np.zeros((x.shape[0],2))
    
    ## START THE LEARNING: estimate mu, phi and sigma.
    for i in range(k_class):
      sigma_det=np.linalg.det(self.sigma[i])
      print(sigma_det)
      sigma_inv=np.linalg.inv(self.sigma[i])
      
      for j in range(x.shape[0]):
        print('shape of x ', x[j].shape)
        print('shape of sigma', sigma_inv.shape)
        print('shape of mu', self.mu[i].shape)
        proba[i, j]=(1/(2*np.pi)**(d/2)* sigma_det**1/2) *np.exp(-1/2*(x[j]-self.mu[i]).T@sigma_inv@(x[j]-self.mu[i]))
    print(proba)
    #return proba


  def predict(self,x):
    pass
    
  
  def accuracy(self, y, ypreds):
    pass

In [75]:
model= GDA()
model.fit(X_train,y_train)

(3, 3)
(3, 3)
[[[ 1.97259906e-06  6.76253928e-06 -4.01872635e-06]
  [ 6.76253928e-06  2.31835950e-05 -1.37771509e-05]
  [-4.01872635e-06 -1.37771509e-05  8.18724992e-06]]

 [[ 1.83906649e-03  1.93729194e-03 -2.57047766e-03]
  [ 1.93729194e-03  2.04076366e-03 -2.70776814e-03]
  [-2.57047766e-03 -2.70776814e-03  3.59277678e-03]]]


In [76]:
yproba= model.predict_proba(X_test)
yproba

9.703767973039057e-48
shape of x  (3,)
shape of sigma (3, 3)
shape of mu (3,)


ValueError: ignored

In [12]:
ypreds= model.predict(X_test)
ypreds

In [13]:
model.accuracy(y_test, ypreds)