# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [287]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [288]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
print(x.shape, y.shape)

(1000, 3) (1000,)


In [289]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    np.random.seed(0) # To demonstrate that if we use the same seed value twice, we will get the same random number twice
  
    n = int(len(x)*train_size)
    indices = np.arange(len(x))
    np.random.shuffle(indices)
    train_idx = indices[: n]
    test_idx = indices[n:]
    X_train, y_train = x[train_idx], y[train_idx]
    X_test, y_test = x[test_idx], y[test_idx]

    return X_train, y_train, X_test, y_test


In [290]:
X_train, y_train, X_test, y_test= split_data(x,y, train_size= 0.8)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [291]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  pass
  n, d = x.shape

  sigma = np.zeros((x.shape[1], x.shape[1]))
  size = x.shape[1]
  for i in range(size):
    for j in range(size):
      sigma[i,j] = 1/(x.shape[0]-1) * np.sum((x[:,i] - mu) * (x[:,j] - mu))
  return sigma


In [298]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None
    
  def fit(self,x,y):
    # k=... # Number of class.
    k= 2
    # d=...  # input dim
    examples, dim = x.shape
    d= dim
    # m= ... # Number of examples.
    m= examples
    
    ## Initialize mu, phi and sigma
    # self.mu= ...#: kxd, i.e., each row contains an individual class mu.
    self.mu= np.zeros((k,d))
    # self.sigma= ...#: kxdxd, i.e., each row contains an individual class sigma.
    self.sigma = np.zeros((k,d,d))
    # self.phi= ...# d-dimension
    self.phi= np.zeros((d,1))

    ## START THE LEARNING: estimate mu, phi and sigma.

    size = len(y)
    for i in range(k):
      self.mu[i] = np.sum(x[i==y]) * x[i] / np.sum(x[i==y])
      self.phi[i] = 1/size * np.sum(x[i==y])
      self.sigma[i] = covariance(x[i].reshape(-1,1), self.mu)


  def predict_proba(self,x):
    # reshape or flatt x.
    # x = ...
    x = x.reshape(-1,1)
    # d = ...
    d = self.mu.shape[0]
    # k_class= ... # Number of classes we have in our case it's k = 2
    k_class= 2
    pi = 3.14

    y_pred = np.zeros((x.shape[0], 1))
    
    # ## START THE LEARNING: estimate mu, phi and sigma.
    for l in range(k_class):
      y_pred[l] = ((1/(2*pi)**(d/2)*np.sqrt(np.linalg.det(self.sigma)) )*np.exp(-0.5*(x - self.mu[l])*np.linalg.inv(self.sigma)*(x-self.mu[l])))

  def predict(self,x):
    return x*self.phi
  
  def accuracy(self, y, ypreds):
    return np.sum(y==ypreds) / len(y)

In [299]:
model= GDA()
model.fit(X_train,y_train)

In [300]:
yproba= model.predict_proba(X_test)
yproba

LinAlgError: Singular matrix

In [301]:
ypreds= model.predict(X_test)
ypreds


ValueError: operands could not be broadcast together with shapes (200,3) (3,1) 

In [None]:
model.accuracy(y_test, ypreds)