# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [19]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [20]:
def split_data(x,y, train_size= 0.8):

    # shuffle the data to randomize the train/test split
    np.random.seed(0) 
    n = int(len(x)*train_size)
    indices = np.arange(len(x))
    np.random.shuffle(indices)
    train_idx = indices[: n]
    test_idx = indices[n:]
    X_train, y_train = x[train_idx], y[train_idx]
    X_test, y_test = x[test_idx], y[test_idx]

    return X_train, y_train, X_test, y_test
    


In [22]:
X_train, y_train, X_test, y_test= split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [56]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  N = x.shape[1]
  sigma = np.zeros((x.shape[1],x.shape[1]))
  for i in range(N):
    for j in range(N):
      va1 = 0
      for d in x:
        va1 += (d[i] - mu[i])*(d[j] - mu[j])

      va1 = va1/(len(x)-1)
      sigma[i,j] = va1
      
  return sigma



In [57]:
covariance(X_train, X_train.mean(0))

array([[1.81780125, 0.00278495, 1.00021288],
       [0.00278495, 0.98802231, 0.04507526],
       [1.00021288, 0.04507526, 1.73006042]])

In [58]:
np.cov(X_train, rowvar=0)

array([[1.81780125, 0.00278495, 1.00021288],
       [0.00278495, 0.98802231, 0.04507526],
       [1.00021288, 0.04507526, 1.73006042]])

In [146]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None
    
  def fit(self,x,y):
    k = 2 # Number of class.
    d = x.shape[1]  # input dim
    m = x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k, d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros((d))# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.

    for god in range(k):
      self.mu[god] = np.mean(x[god == y], axis = 0)
      self.sigma[god] = covariance(x[god == y], self.mu[god])
      self.phi[god] = np.sum(god == y)/m

    return self.mu, self.sigma, self.phi



  def predict_proba(self,x):
    # reshape or flatt x.
    #x = x.reshape(-x.shape[0],1)
    d = x.shape[1] 
    k_class =  self.mu.shape[0] # Number of classes we have in our case it's k = 2
    m = x.shape[0]

    ## START THE LEARNING: estimate mu, phi and sigma.

    score = np.zeros((x.shape[0], k_class))
    det_cov = []
    inv_cov = []
    
    for lab in range(k_class):
      #det_cov = np.linalg.det(self.sigma[lab])
      det_cov_ = np.linalg.det(self.sigma[lab])
      inv_cov_ = np.linalg.inv(self.sigma[lab])
      det_cov.append(det_cov_)
      inv_cov.append(inv_cov_)
      for i in range(m):
        score[i, lab] = (1/((2*np.pi)**(d/2))*np.sqrt(det_cov[lab]))*np.exp((-1/2)*(x[i] - self.mu[lab]).T@inv_cov[lab]@(x[i] - self.mu[lab]))*self.phi[lab]

    return score



    
  def predict(self,x):
    prob = self.predict_proba(x)
    return np.argmax(prob, axis = 1)
    pass
  
  def accuracy(self, y, ypreds):
    acc = np.mean(y==ypreds)*100
    return acc
    pass

In [141]:
model= GDA()
model.fit(X_train,y_train)

(array([[ 1.02275133,  1.04572584,  1.00413266],
        [-0.98392793,  0.97138702, -0.92252973]]),
 array([[[ 0.84003779, -0.3843979 , -0.05412644],
         [-0.3843979 ,  1.60593247,  0.08878339],
         [-0.05412644,  0.08878339,  0.03571483]],
 
        [[ 0.78132385,  0.32954533,  0.12345307],
         [ 0.32954533,  0.34452814, -0.07353813],
         [ 0.12345307, -0.07353813,  1.60018851]]]),
 array([0.51, 0.49, 0.  ]))

In [142]:
model.sigma.ndim

3

In [143]:
yproba= model.predict_proba(X_test)
yproba

array([[4.06350563e-072, 1.39138624e-002],
       [1.64246284e-022, 4.74345939e-003],
       [4.45824366e-003, 6.04113838e-005],
       [5.31903154e-094, 1.10056686e-002],
       [6.87569032e-107, 7.93892204e-003],
       [7.61917224e-055, 2.75817630e-002],
       [6.19795361e-003, 3.66996712e-003],
       [3.04717201e-014, 2.40376655e-002],
       [9.27319382e-003, 2.51512780e-006],
       [4.89007778e-006, 7.57722928e-003],
       [4.78091154e-066, 2.93154839e-003],
       [1.86026621e-017, 1.26779397e-002],
       [6.64256051e-038, 2.76490588e-003],
       [3.76235234e-018, 5.04965997e-003],
       [8.88264481e-004, 4.62236654e-009],
       [8.48567909e-072, 8.14672723e-003],
       [1.41964243e-052, 1.41398535e-004],
       [1.50702006e-058, 2.67246139e-002],
       [1.02100594e-009, 6.70899387e-003],
       [1.26466758e-062, 2.35403119e-002],
       [5.36396249e-090, 4.18822945e-003],
       [7.29714803e-003, 6.43646655e-004],
       [3.87720142e-061, 1.78792721e-002],
       [3.9

In [144]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [145]:
model.accuracy(y_test, ypreds)

96.0