# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [2]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x, y = generate_data()  # get data
print(x.shape, y.shape)



(1000, 3) (1000,)


In [3]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    idx = int(train_size * len(x))
    return x[:idx], y[:idx], x[idx:], y[idx:]

X_train, y_train, X_test, y_test = split_data(x, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)



(800, 3) (800,) (200, 3) (200,)


In [4]:
X_train, y_train, X_test, y_test = split_data(x, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


(800, 3) (800,) (200, 3) (200,)


In [10]:
def covariance(x, mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  n,d=x.shape
  cov=np.zeros((d,d))
  print(cov.shape)
  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
  for i in range(d):
    for j in range(d):
      cov[i,j]=np.sum((x[:,i]-mu[i])*(x[:,j]-mu[j]))/n-1
  return cov
covariance(X_train, mu=np.mean(x, axis=0))
       

(3, 3)


array([[ 0.8488706 , -1.01768208, -0.02765711],
       [-1.01768208,  0.00582915, -0.97646158],
       [-0.02765711, -0.97646158,  0.73777718]])

In [5]:
class GDA:
    def __init__(self):
        ## set mu, phi and sigma to None
        self.mu = None
        self.phi = None
        self.sigma = None
    
    def fit(self, x, y):
        k = 2 # Number of classes
        d = x.shape[1] # input dimension
        m = x.shape[0] # Number of examples
        
        ## Initialize mu, phi and sigma
        self.mu = np.vstack([np.mean(x[y==0], axis=0), np.mean(x[y==1], axis=0)]) # kxd, each row contains an individual class mu
        self.sigma = np.zeros((k, d, d)) # kxdxd, each row contains an individual class sigma
        self.phi = np.mean(y) # scalar
        
        ## START THE LEARNING: estimate mu, phi and sigma.
        for i in range(m):
            mui = np.reshape(x[i] - self.mu[y[i]], (d, 1))
            self.sigma[y[i]] = self.sigma[y[i]] + np.dot(mui, mui.T)
        self.sigma /= m
        
    def predict_proba(self, x):
        # reshape or flatten x
        x = x.reshape(-1, x.shape[1])
        d = x.shape[1] # input dimension
        k_class = self.sigma.shape[0] # Number of classes, here it's k=2
        
        ## START THE LEARNING: estimate mu, phi and sigma.
        py0 = 1 - self.phi
        py1 = self.phi
        det_sigma = np.linalg.det(self.sigma)
        inv_sigma = np.linalg.inv(self.sigma)
        pdf0 = np.exp(-0.5 * np.sum(np.dot((x - self.mu[0]), inv_sigma[0]) * (x - self.mu[0]), axis=1)) / np.sqrt((2*np.pi)**(d/2) * det_sigma[0])
        pdf1 = np.exp(-0.5 * np.sum(np.dot((x - self.mu[1]), inv_sigma[1]) * (x - self.mu[1]), axis=1)) / np.sqrt((2*np.pi)**(d/2) * det_sigma[1])
        
        probs = np.vstack([pdf0, pdf1]).T
        
        return probs
        
    def predict(self, x):
        probs = self.predict_proba(x)
        preds = np.argmax(probs, axis=1)
        
        return preds
    
    def accuracy(self, y, ypreds):
        return np.mean(ypreds == y)

In [6]:
model= GDA()
model.fit(X_train,y_train)

In [7]:
yproba= model.predict_proba(X_test)
yproba

array([[1.96126319e+000, 1.85434263e-003],
       [6.36993720e-001, 2.76609251e-012],
       [8.65146850e-001, 2.42063655e-014],
       [1.32026850e+000, 1.01079828e-005],
       [6.13004952e-133, 6.37381485e-001],
       [1.57158192e-047, 1.22663284e-002],
       [7.47896709e-004, 8.39345749e-009],
       [7.69057431e-002, 1.04650774e-014],
       [6.30736774e-002, 6.14636309e-014],
       [3.21851120e-002, 4.90707880e-007],
       [1.03787076e-080, 1.22930826e-003],
       [3.55250141e-004, 6.38059127e-006],
       [1.07534646e-044, 3.94081306e-001],
       [2.99372008e-009, 1.98662751e-003],
       [2.53810857e-080, 6.71077576e-001],
       [2.37157701e-163, 3.03687579e-001],
       [8.65389362e-002, 7.47606691e-005],
       [3.67742707e-005, 1.08022569e-001],
       [4.10807103e-005, 8.79976876e-005],
       [2.03748706e-014, 5.47174176e-001],
       [3.43901537e-003, 3.64944225e-036],
       [2.69414815e+000, 1.19620369e-006],
       [6.45588313e-262, 1.37548124e-002],
       [3.6

In [8]:
ypreds= model.predict(X_test)
ypreds


array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 0])

In [9]:
model.accuracy(y_test, ypreds)

0.97