# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [172]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [173]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [175]:
np.random.seed(0)

In [176]:
def split_data(x,y, train_size= 0.8):
  """
  split_data(x,y, train_size= 0.8) accepts x, y, and train_size
  It returns x,y splitted to train and test according to train_size
  """
  # shuffle the data to randomize the train/test split
  data_size=x.shape[0]
  idx=np.random.permutation(data_size)
  x=x[idx]
  y=y[idx]
  train_val=round(train_size*data_size)
  x_train,y_train=x[0:train_val,:],y[0:train_val]
  x_test,y_test=x[train_val:,:],y[train_val:]
  return x_train,x_test,y_train,y_test


In [177]:
X_train, X_test, y_train, y_test= split_data(x,y) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [178]:
def covariance(x, mu):
  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  m,d=x.shape
  k=mu.shape[0]
  resl_cov=np.zeros((k,d,d))
  for cla_num in range(k):
    for num_col1 in range(d):
      for num_col2 in range(d):
        resl_cov[cla_num][num_col1,num_col2]=(1/(m-1))*np.sum( (x[:,num_col1].reshape(-1,1)-mu[cla_num,num_col1])*(x[:,num_col2].reshape(-1,1)-mu[cla_num,num_col2])    )
  return resl_cov

In [179]:
mean1=np.mean(x,axis=0);mean1=mean1.reshape(1,-1)

In [180]:
np.cov(x,rowvar=False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [181]:
covariance(x,mean1)

array([[[1.84495325, 0.02790646, 1.00137533],
        [0.02790646, 1.00170721, 0.05539176],
        [1.00137533, 0.05539176, 1.74832   ]]])

In [182]:
def multiv_gauss(x,mu,sigma):
  x=x.reshape(1,-1)
  d=mu.shape[0]
  val_1=np.linalg.det(sigma)**0.5
  val_2=(2*np.pi)**(d/2)
  val_3=np.exp(-0.5*(x-mu)@np.linalg.inv(sigma)@(x-mu).T  )
  z1=(1/(val_1*val_2) )*val_3
  return z1.reshape(-1,1)

In [206]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu= None
    self.sigma= None
    self.phi= None
    
  def fit(self,x,y):
    k=np.unique(y).shape[0] # Number of class.
    d=x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros((k,1))# k-dimension
    #label_valu=np.unique(y)

    ## START THE LEARNING: estimate mu, phi and sigma.
    for class_num in range(k):
      ##Calculating Phi
      self.phi[class_num,0]=y_train[y==class_num].shape[0]/y.shape[0]
      idx=np.where(y==class_num)
      ##Caculating mu
      self.mu[class_num]=np.mean(x[idx],axis=0)
      ##Caculating Sigma
      self.sigma[class_num]=covariance(x, self.mu[class_num].reshape(1,-1))
    
    
  def predict_proba(self,x):
    # reshape or flatt x.
    n=x.shape[0]
    d= self.mu.shape[1]
    k_class=self.mu.shape[0] # Number of classes we have in our case it's k = 2
    det_sigma=0
    y_pred=np.zeros((n,k_class))
    for i in range(k_class):
      det_sigma=np.linalg.det(self.sigma[i])**0.5
      inv_sigma=np.linalg.inv(self.sigma[i])
      for j in range(n):
        val_2=(2*np.pi)**(d/2)
        val_3=np.exp(-0.5*(x[j].reshape(1,-1)-self.mu[i].reshape(1,-1))@inv_sigma@(x[j].reshape(1,-1)-self.mu[i]).reshape(1,-1).T  )
        z1=(1/(det_sigma*val_2) )*val_3
        y_pred[j,i]=z1*self.phi[i]
    return y_pred

    ## START THE LEARNING: estimate mu, phi and sigma.

  def predict(self,x):
    y_pred=self.predict_proba(x)
    y_res=np.argmax(y_pred,axis=1)
    return y_res
  
  def accuracy(self, y, ypreds):
    acc=np.mean(y==ypreds)*100
    return acc

In [207]:
model= GDA()
model.fit(X_train,y_train)

In [208]:
model.mu

array([[ 1.02275133,  1.04572584,  1.00413266],
       [-0.98392793,  0.97138702, -0.92252973]])

In [209]:
model.phi

array([[0.51],
       [0.49]])

In [210]:
model.sigma

array([[[2.78583678, 0.03864649, 1.92964772],
        [0.03864649, 0.98935083, 0.07950682],
        [1.92964772, 0.07950682, 2.62243381]],

       [[2.86647281, 0.04163371, 2.00706837],
        [0.04163371, 0.98946149, 0.08237492],
        [2.00706837, 0.08237492, 2.69676728]]])

In [211]:
yproba= model.predict_proba(X_test)
yproba

array([[2.49750663e-03, 1.20875369e-02],
       [9.26930267e-04, 3.11908708e-03],
       [9.30518423e-03, 2.64038083e-03],
       [1.71393978e-03, 7.39199929e-03],
       [1.30131722e-03, 8.77613859e-03],
       [4.95218359e-03, 1.45493142e-02],
       [1.23035834e-02, 6.59724126e-03],
       [7.22695457e-03, 1.16909529e-02],
       [1.27122141e-02, 5.04758484e-03],
       [2.31717523e-03, 3.15927745e-03],
       [1.13657415e-03, 6.23686102e-03],
       [1.06674704e-02, 1.10726942e-02],
       [4.28448864e-03, 1.15375044e-02],
       [7.65597737e-04, 2.19145615e-03],
       [3.69590478e-04, 1.70507796e-04],
       [1.37823207e-03, 3.03719737e-03],
       [6.68637037e-05, 6.00993700e-04],
       [4.57469926e-03, 1.37957920e-02],
       [8.46249380e-04, 1.76366916e-03],
       [3.97367514e-03, 1.38652238e-02],
       [8.08404126e-04, 2.38170181e-03],
       [1.27549862e-02, 4.71680456e-03],
       [3.39172279e-03, 8.27430000e-03],
       [9.35720105e-04, 3.45558281e-03],
       [1.115477

In [212]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [213]:
model.accuracy(y_test, ypreds)

94.5