# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [2]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [3]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
  
  n = int(len(x)*train_size)
  indices = np.arange(len(x))
  np.random.shuffle(indices)
  train_idx = indices[: n]
  test_idx = indices[n:]
  x_train, y_train = x[train_idx], y[train_idx]
  x_test, y_test = x[test_idx], y[test_idx]

  return x_train, x_test, y_train, y_test


In [4]:
X_train, X_test, y_train, y_test= split_data(x, y) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [5]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  sigma = np.zeros((x.shape[1], x.shape[1]))
  for i in range(x.shape[1]):
    for j in range(x.shape[1]):
      s = 0
      for d in x:
        s += (d[i] - mu[i])*(d[j] - mu[j])
      sigma[i, j] = (1/(len(x) - 1)) * s

  return sigma


In [6]:
covariance(x, x.mean(axis=0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [7]:
np.cov(x, rowvar = False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [8]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None
    
  def fit(self,x,y):
    k = len(np.unique(y)) # Number of class.
    d = x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu = np.zeros((k, d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma = np.zeros((k, d, d)) #: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(k) # d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
    for i in range(k):
      self.phi[i] = np.sum(y == i) / m
      self.mu[i] = np.mean(x[y == i], axis = 0)
      self.sigma[i] = covariance(x[y == i], self.mu[i])
    


  def predict_proba(self,x):
    # reshape or flatt x.
    #x = ...
    d= x.shape[1]
    k_class= self.mu.shape[0] # Number of classes we have in our case it's k = 2
    det_cov = []
    inv_cov = []
    mat_prob = np.zeros((x.shape[0], k_class))
    for lab in range(k_class):
      detcov = np.linalg.det(self.sigma[lab])
      det_cov.append(detcov)
      invcov = np.linalg.inv(self.sigma[lab])
      inv_cov.append(invcov)
      for i in range(x.shape[0]):
        mat_prob[i, lab] = 1/((2*np.pi)**(d/2)*(det_cov[lab])**(1/2)) * np.exp(-1/2*(x[i] - self.mu[lab]).T @ inv_cov[lab] @ (x[i] - self.mu[lab]))
      
    return mat_prob 

    
    ## START THE LEARNING: estimate mu, phi and sigma.

  def predict(self,x):
    proba = self.predict_proba(x) * self.phi
    return np.argmax(proba, axis = 1)

  
  def accuracy(self, y, ypreds):
    acc = np.mean(y == ypreds)
    return acc * 100

In [9]:
model= GDA()
model.fit(X_train,y_train)

In [10]:
model.sigma.shape

(2, 3, 3)

In [11]:
yproba= model.predict_proba(X_test)
yproba

array([[2.56718491e-001, 1.00251820e-005],
       [5.69599324e-095, 1.87513024e-002],
       [9.37119054e-055, 1.01814243e-001],
       [2.24918208e-001, 2.96958029e-003],
       [2.38540835e-028, 2.39823510e-002],
       [1.71363438e-001, 1.39879290e-003],
       [2.36628908e-001, 4.38018384e-004],
       [1.46101828e-001, 7.57691612e-008],
       [3.01593231e-011, 4.37916551e-002],
       [1.96445852e-001, 1.94135510e-002],
       [1.08281350e-001, 8.37080266e-003],
       [2.56811244e-002, 2.51806064e-006],
       [2.64276360e-052, 4.41357883e-002],
       [5.68593767e-002, 7.89636040e-006],
       [5.46367213e-051, 4.22277766e-002],
       [2.09974784e-015, 6.46076100e-002],
       [3.67347073e-043, 1.11895070e-001],
       [8.57451535e-002, 9.46151846e-005],
       [2.16005110e-002, 1.83342066e-017],
       [4.89272163e-003, 1.74615854e-002],
       [2.31468570e-001, 3.72808964e-006],
       [1.34555234e-013, 5.71905316e-002],
       [3.74139269e-034, 7.05437064e-002],
       [4.8

In [12]:
ypreds= model.predict(X_test)
ypreds


array([0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0])

In [13]:
model.accuracy(y_test, ypreds)

98.0