# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [39]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [40]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [41]:
y=y.reshape(-1,1)
y.shape

(1000, 1)

In [42]:
def split_data(x,y, train_size= 0.8):

    # shuffle the data to randomize the train/test spliT
  np.random.seed(0)
  data=np.hstack((x,y))
  data1=np.random.permutation(len(x))
  data = data[data1]
  var=len(x)*train_size
  X_train,Y_train=data[:int(var), :-1], data[:int(var),-1]
  X_test,Y_test=data[int(var):, : -1],data[int(var): , -1]

  return X_train,Y_train,X_test,Y_test
  



In [43]:
X_train,y_train,X_test,y_test= split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [44]:
def covariance(x, mu):
  N=x.shape[1]
  
  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  sigma=np.zeros((x.shape[1],x.shape[1]))
 
  for i in range(N):
    for j in range(N):
      covariance=0
      for d in x:
        covariance+= (d[i]-mu[i])*(d[j]-mu[j])
      sigma[i,j]=covariance/(len(x)-1)
  return sigma
      


In [45]:
a=covariance(X_train,X_train.mean(0))
a

array([[1.81780125, 0.00278495, 1.00021288],
       [0.00278495, 0.98802231, 0.04507526],
       [1.00021288, 0.04507526, 1.73006042]])

In [46]:
cov= np.cov(X_train, rowvar=0)
cov

array([[1.81780125, 0.00278495, 1.00021288],
       [0.00278495, 0.98802231, 0.04507526],
       [1.00021288, 0.04507526, 1.73006042]])

In [73]:
from IPython.lib.display import YouTubeVideo
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu=None
    self.phi=None
    self.sigma=None
    
  def fit(self,x,y):
    k=2 # Number of class.
    d=x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d)) #: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros((d))# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.

    for phanie in range(k):
      self.mu[phanie]=np.mean(x[phanie==y],axis=0)
      self.phi[phanie]=np.sum(phanie == y)/m
      self.sigma[phanie]=covariance((x[phanie==y]),self.mu[phanie])

    return self.mu,self.sigma,self.phi





  def predict_proba(self,x):
    # reshape or flatt x.
    #x=x.reshape(-x.shape[0],1)
    d= x.shape[1]
    k_class= self.mu.shape[0] # Number of classes we have in our case it's k = 2
    m= x.shape[0]
    
    ## START THE LEARNING: estimate mu, phi and sigma.

    det=[]
    inv_det=[]
    scores=np.zeros((x.shape[0],self.mu.shape[0]))

    for lab in range(k_class):
      a=np.linalg.det(self.sigma[lab])
      det.append(a)
      b=np.linalg.inv(self.sigma[lab])
      inv_det.append(b)
      for j in range(m):
       scores[j, lab] = (1/((2*np.pi)**(d/2))*np.sqrt(det[lab]))*np.exp((-1/2)*(x[j] - self.mu[lab]).T@inv_det[lab]@(x[j] - self.mu[lab]))*self.phi[lab]
    return scores

  def predict(self,x):
    probability = self.predict_proba(x)
    return np.argmax(probability, axis = 1)
    pass
  

  def accuracy(self,ypreds,y):
    return np.mean(y==ypreds)*100


  
 

In [74]:
model= GDA()
model.fit(X_train,y_train)

(array([[ 1.02275133,  1.04572584,  1.00413266],
        [-0.98392793,  0.97138702, -0.92252973]]),
 array([[[ 0.84003779, -0.3843979 , -0.05412644],
         [-0.3843979 ,  1.60593247,  0.08878339],
         [-0.05412644,  0.08878339,  0.03571483]],
 
        [[ 0.78132385,  0.32954533,  0.12345307],
         [ 0.32954533,  0.34452814, -0.07353813],
         [ 0.12345307, -0.07353813,  1.60018851]]]),
 array([0.51, 0.49, 0.  ]))

In [75]:
yproba= model.predict_proba(X_test)
yproba

array([[2.07238787e-072, 6.81779258e-003],
       [8.37656050e-023, 2.32429510e-003],
       [2.27370427e-003, 2.96015780e-005],
       [2.71270609e-094, 5.39277762e-003],
       [3.50660206e-107, 3.89007180e-003],
       [3.88577784e-055, 1.35150639e-002],
       [3.16095634e-003, 1.79828389e-003],
       [1.55405773e-014, 1.17784561e-002],
       [4.72932885e-003, 1.23241262e-006],
       [2.49393967e-006, 3.71284235e-003],
       [2.43826489e-066, 1.43645871e-003],
       [9.48735767e-018, 6.21219045e-003],
       [3.38770586e-038, 1.35480388e-003],
       [1.91879969e-018, 2.47433339e-003],
       [4.53014885e-004, 2.26495961e-009],
       [4.32769634e-072, 3.99189634e-003],
       [7.24017638e-053, 6.92852823e-005],
       [7.68580231e-059, 1.30950608e-002],
       [5.20713030e-010, 3.28740700e-003],
       [6.44980463e-063, 1.15347528e-002],
       [2.73562087e-090, 2.05223243e-003],
       [3.72154549e-003, 3.15386861e-004],
       [1.97737272e-061, 8.76084331e-003],
       [2.0

In [76]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [77]:
model.accuracy(y_test, ypreds)

96.0