# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [18]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [19]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [20]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    n,d=x.shape
    data=np.hstack((x,y.reshape(-1,1)))
    idex=np.random.permutation(n)
    data_shuffle=data[idex]
    xtrain,ytrain=data_shuffle[:round(n*train_size),:-1],data_shuffle[:round(n*train_size),-1]
    xtest,ytest=data_shuffle[round(n*train_size):,:-1],data_shuffle[round(n*train_size):,-1]
    return xtrain,xtest,ytrain,ytest


In [21]:
X_train, X_test, y_train, y_test= split_data(x,y,0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [22]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  n,d=x.shape
  cov_matrix=np.zeros((d,d))
  for i in range(d):
    for j in range(d):
      s=0
      for k in range(n):
        s+=(x[k,i]-mu[i])*(x[k,j]-mu[j])
      cov_matrix[i,j]=s/(n-1)
  return cov_matrix

In [23]:
covariance(x,x.mean(0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [24]:
np.cov(x,rowvar=0)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [25]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu=None
    self.phi=None
    self.sigma=None
    
  def fit(self,x,y):
    k= len(np.unique(y)) # Number of class.
    d= x.shape[1] # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d)) #: kxd, i.e., each row contains an individual class mu.
    self.sigma=  np.zeros((k,d,d)) #: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros((k,1)) # k-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
    for cl in range(k):
      self.phi[cl]=np.mean(y==cl)
      self.mu[cl]=np.mean(x[y==cl],axis=0)
      self.sigma[cl]=covariance(x[y==cl],self.mu[cl])
    # return self.phi,self.mu,self.sigma
    

  def predict_proba(self,x):
    # reshape or flatt x.
    # x= x.reshape(-x.shape[0],1)
    d= x.shape[1]
    n=x.shape[0]
    k_class= len(self.mu) # Number of classes we have in our case it's k = 2
    
    ## START THE LEARNING: estimate mu, phi and sigma.
    probas=np.zeros((n,k_class))
    for cl in range(k_class):
      det=np.linalg.det(self.sigma[cl])
      inverse=np.linalg.inv(self.sigma[cl])
      for i in range(n):
        probas[i,cl]=((1/((2*np.pi)**(d/2))*det**0.5)*np.exp(-0.5*(x[i,:]-self.mu[cl]).T@inverse@(x[i,:]-self.mu[cl])))*self.phi[cl]
    return probas

  def predict(self,x):
    probas=self.predict_proba(x)
    ypred=np.argmax(probas,axis=1)
    return ypred
  
  def accuracy(self, y, ypreds):
    return np.mean(ypreds==y)*100

In [26]:
model= GDA()
model.fit(X_train,y_train)

In [None]:
yproba= model.predict_proba(X_test)
yproba

In [None]:
ypreds= model.predict(X_test)
ypreds


In [None]:
model.accuracy(y_test, ypreds)