# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [132]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [133]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
# get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [134]:
x

array([[-0.42600458,  0.3092346 ,  1.13238592],
       [ 0.23750039,  0.85236655,  1.27032566],
       [-1.50956177,  0.57932947, -0.58204952],
       ...,
       [ 0.38796174,  1.01606996, -1.499496  ],
       [-0.74578403,  1.56454128, -1.05700466],
       [ 1.08716336, -0.29150009,  0.98548405]])

In [135]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
  
    n = int(len(x)*train_size)
    indices = np.arange(len(x))
    np.random.shuffle(indices)
    train_idx = indices[: n]
    test_idx = indices[n:]
    X_train, y_train = x[train_idx], y[train_idx]
    X_test, y_test = x[test_idx], y[test_idx]
    return X_train,X_test,y_train,y_test

In [136]:
X_train, X_test, y_train, y_test= split_data(x,y, train_size=0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [137]:
#class
np.unique(y)

array([0, 1])

In [165]:
# for label in range(2):
#     bool_lean = (label==y)
# bool_lean

In [224]:
for lab in range(2):
    idx = np.where(y==lab,1,0)
idx[idx==0]

502

In [138]:
#   def compute_mu(x,y,k):
#     d = x.shape[1]
#     self.mu = np.zeros((k,d))
#     for i in range(k):
#         idx = np.where(y==i)
#         x_trans = x[idx].T
#         for j in range(d):
#             self.mu[i,j]=np.mean(x_trans[j])
#     return self.mu

In [139]:
#   def compute_phi(self,x):
#         d =x.shape[1]
#         self.phi = np.zeros((d,1))
#         x_trans =x.T
#         for k in range(d):
#             self.phi[k]=np.mean(x_trans[k])
#         return self.phi

In [177]:
def covariance(x, mu):
    #mu.shape =(1,3) , x.shape =(m,3) for a single class
    m,d = x.shape
    cov =np.zeros((d,d))
    for i in range(d):
        for j in range(d):
            cov[i,j] = np.sum((x[:, i] - mu[i]) * (x[:, j] - mu[j])) / (m - 1)
    return cov
                

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.


In [178]:
# def covariance1(x, mu):
#     #mu.shape =(1,3) , x.shape =(m,3) for a single class
#     m,d = x.shape
#     sigma =np.zeros((d,d))
#     for i in range(d):
#         for j in range(d):
#             sigma[i, j] = np.sum((x[:, i] - mu[i] )* (x[:, j] - mu[j])) / (m - 1)
#     return sigma

In [179]:
np.cov(x,rowvar=0 )

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [180]:
covariance1(x, x.mean(0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [251]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None

  def fit(self,x,y):
    k= len(np.unique(y)) # Number of class.
    d=x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu=  np.zeros((k,d)) #: kxd, i.e., each row contains an individual class mu.
    self.phi = np.zeros(k) #k-dimension
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    for lab in range(k):
        bool = (y==lab)
        self.mu[lab]=np.mean(x[bool],axis=0)
        self.phi[lab]=np.sum(bool)/m
        self.sigma[lab] = covariance(x[bool],self.mu[lab])
    

    print(f'mu = {self.mu}')
    print(f'phi = {self.phi}')
    print(f'sigma = {self.sigma}')
    return
    

  def predict_proba(self,x):
    # reshape or flatt x.
    d = x.shape[1]

    k_class= self.mu.shape[0] # Number of classes we have in our case it's k = 2

    ## START THE LEARNING: estimate mu, phi and sigma.
    proba = np.zeros((x.shape[0],k_class))
    for lab in range(k_class):
      p_y = self.phi[lab]
      for i in range(x.shape[0]):
        proba[i,lab]=1/((2*np.pi)**(d/2)*np.linalg.det(self.sigma[lab])**0.5)*np.exp(-0.5*(x[i]-self.mu[lab]).T@np.linalg.inv(self.sigma[lab])@(x[i]-self.mu[lab]))*p_y
        
    return proba
  
  def predict(self,x):
    probx = self.predict_proba(x)
    y_pred = np.argmax(probx, axis=1)
    return y_pred
    
  def accuracy(self, y, ypreds):
        accur = np.mean(y==ypreds)*100
        return accur
    

In [252]:
model= GDA()
model.fit(X_train,y_train)

mu = [[ 0.9285123   1.13174885  1.01552489]
 [-0.99842585  0.96249411 -0.93427136]]
phi = [0.50625 0.49375]
sigma = [[[ 0.87073059 -0.35599351 -0.05999768]
  [-0.35599351  1.58635648  0.08263253]
  [-0.05999768  0.08263253  0.03756345]]

 [[ 0.76206072  0.3278072   0.09112742]
  [ 0.3278072   0.35757082 -0.09710484]
  [ 0.09112742 -0.09710484  1.54362085]]]


In [253]:
yproba= model.predict_proba(X_test)
yproba

array([[1.80768044e-006, 1.31011251e-002],
       [2.20796026e-018, 3.60153732e-002],
       [2.95396591e-013, 2.86586800e-002],
       [7.58299211e-024, 1.18247007e-002],
       [1.76991667e-056, 3.67759511e-003],
       [7.55821241e-002, 1.17472781e-002],
       [3.38084128e-004, 1.29843005e-002],
       [1.35474794e-001, 9.90825423e-004],
       [1.38596053e-001, 3.04074165e-003],
       [1.12061804e-061, 3.00929895e-002],
       [1.90920938e-067, 3.17944408e-002],
       [6.70080820e-002, 3.29942398e-004],
       [1.03279892e-041, 3.66959032e-002],
       [7.78958281e-021, 1.07708576e-002],
       [1.97188190e-002, 4.17102265e-013],
       [4.18947220e-002, 1.08831051e-007],
       [2.00216604e-033, 5.62411644e-002],
       [3.78631077e-002, 3.54168499e-011],
       [1.47876223e-006, 1.78903952e-002],
       [8.49559473e-003, 2.65184512e-007],
       [2.02346427e-034, 1.54990133e-002],
       [1.07203920e-133, 5.99508568e-003],
       [5.54468368e-002, 1.88206185e-004],
       [1.9

In [254]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 0])

In [255]:
model.accuracy(y_test, ypreds)

98.0