# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [128]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data() # get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [129]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split

    np.random.seed(0)
    n=int(len(x)*train_size)
    indices=np.arange(len(x))
    np.random.shuffle(indices)
    train_idx=indices[:n]
    test_idx=indices[n:]
    X_train, y_train=x[train_idx],y[train_idx]
    X_test, y_test=x[test_idx],y[test_idx]

    return X_train ,X_test,y_train,y_test
 
    pass


In [130]:
X_train, X_test, y_train, y_test= split_data(x,y,train_size=0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [118]:
def covariance(x, mu):

  N=x.shape[0]
  X_=x.shape[1]
  __cov=np.zeros((X_,X_))

  for i in range(X_):
    for j in range(X_):
        var = 0
        for d in x:
            var += ((d[i] - mu[i]) * (d[j] - mu[j]))/(N-1)
            __cov[i][j]=var
  
  #print('cov', __cov)
  return __cov

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  pass

In [119]:
np.cov(x,rowvar= False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [120]:
covariance(x,x.mean(0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [46]:
def mu(x,k):
  d=x.shape[0]
  mu=np.zeros((k,d))
  return mu

In [47]:
def phi(y,k):
  p=np.zeros(k)


In [None]:
# for i,xi in enumerate(x):
#   print(xi)

In [235]:
class GDA:
  def __init__(self):
    self.mu=None
    self.sigma=None
    self.phi=None
    ## set mu, phi and sigma to None
    
  def fit(self,x,y):
    k= len(np.unique(y))# Number of class.
    d=x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(k)#phi(k)# d-dimension

    for lab in range(k):
      self.phi[lab]=np.sum(lab==y)/m
      self.mu[lab]=np.mean(x[lab==y],axis=0)
      self.sigma[lab]= covariance(x[lab==y],self.mu[lab])

    ## START THE LEARNING: estimate mu, phi and sigma.


  def predict_proba(self,x):
   
    # reshape or flatt x.
    #x= x.reshape(-1,1)
    d= x.shape[1]
    k_class= len(np.unique(y)) # Number of classes we have in our case it's k = 2
    # xx=np.linalg.det(self.sigma[k_class])**0.5
    
    # y_pred=np.exp(_x)/(2*np.pi)**d/2 @ xx

    cov_det= 0
    cov_inv= 0
    scores=np.zeros((x.shape[0],self.mu.shape[0]))

    # p=self.phi[0]
    # p_=self.phi[1]

    for lab in range(k_class):

      cov_det= np.linalg.det(self.sigma[lab])
      cov_inv= np.linalg.inv(self.sigma[lab])
      

      for xi in range(x.shape[0]):
        v1=np.subtract(x[xi],self.mu[lab])
        v2=(2*np.pi)**((d/2)  * ((cov_det)**0.5))
        v3=(v1 )@ cov_inv @ (v1.T)
        # z=np.exp((x[xi].reshape(1,-1)-self.mu[lab].reshape(1,-1))@ cov_inv @ ((x[xi].reshape(1,-1)-self.mu[lab].reshape(1,-1)).T))/((2*np.pi)**d/2 * cov_det**0.5)
        z=(np.exp(-0.5*v3)) / v2

        scores[xi,lab]=np.log(z) + np.log(self.phi[lab])


        

    return scores
    ## START THE LEARNING: estimate mu, phi and sigma.

  def predict(self,x):
    pred=self.predict_proba(x)
    return np.argmax(pred,axis=1)

    
    # pass
  
  def accuracy(self, y, ypreds):
    acc = np.mean(y==ypreds)*100
    return acc
    # pass

In [236]:
y_train.shape ,X_train.shape

((800,), (800, 3))

In [237]:
model= GDA()
model.fit(X_train,y_train)

In [None]:
yproba= model.predict_proba(X_test)
yproba

In [239]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [240]:
model.accuracy(y_test, ypreds)

98.0