<a href="https://colab.research.google.com/github/BintaSOW1/GDA_Live_coding_FML23/blob/main/Binta_Sow_GDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [51]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [52]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y



In [53]:
x,y= generate_data()
print(x.shape, y.shape)

(1000, 3) (1000,)


In [54]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
  

  np.random.seed(0) 
  
  train_size = 0.8
  n = int(len(x)*train_size)
  indices = np.arange(len(x))
  np.random.shuffle(indices)
  train_idx = indices[: n]
  test_idx = indices[n:]
  X_train, y_train = x[train_idx], y[train_idx]
  X_test, y_test = x[test_idx], y[test_idx]

  return X_train, y_train, X_test, y_test
  

In [55]:
X_train, y_train, X_test, y_test= split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [6]:
def covariance(x, mu):
  N,d = x.shape
  K= mu.shape[0]
  sigma = np.zeros((d,d))
  for k in range(K):
   for i in range(d):
     for j in range(d):
       sigma[i,j] = (1/(N-1))* np.sum((x[:,i] - mu[i]) * (x[:,j] - mu[j]))
       
  return sigma


In [7]:
covariance(x,x.mean(axis=0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [8]:
np.cov(x, rowvar=False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [9]:
from math import pi
class GDA:
  def __init__(self):
    self.mu=None
    self.phi = None
    self.sigma = None

  def fit(self,x,y):
    k=np.unique(y).shape[0] # Number of class.
    d= x.shape[1]# input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu = np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(d)# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
    for label in range(k):
      
      self.phi[label] = np.sum(label==y) / m
      self.mu[label] =  np.mean(x[label==y], axis=0)
      self.sigma[label] = covariance(x[label== y], self.mu[label])
    # return self.mu ,self.phi ,self.sigma

  def predict_proba(self,x):
    #x= x.reshape(-x.shape[0],1)
    d= x.shape[1]
    k= self.mu.shape[0]
    k_class=k # Number of classes we have in our case it's k = 2
    
    ## START THE LEARNING: estimate mu, phi and sigma
    score= np.zeros((x.shape[0], self.mu.shape[0]))
    for label in range(k_class):
      
      sigma_det = np.linalg.det(self.sigma[label])**(1/2)
      sigma_inv = np.linalg.inv(self.sigma[label])
      
      for i in range(x.shape[0]):
        
        d1 = (2*pi)**(d/2)
        
        part1=1/2*((x[i] - self.mu[label]).T@sigma_inv@(x[i] - self.mu[label]))
        score[i,label] = 1/(d1 * sigma_det)*np.exp(-part1)*self.phi[label]

        
    return score


  def predict(self,x):
    predict = self.predict_proba(x)
    y_preds = np.argmax(predict,axis=1)
    return y_preds
  
  def accuracy(self, y, ypreds):
    accuracy = np.mean(y==ypreds)*100
    return accuracy

In [10]:
model= GDA()
model.fit(X_train,y_train)

In [11]:
yproba= model.predict_proba(X_test)
yproba

array([[5.87551113e-071, 2.82297074e-002],
       [2.37487273e-021, 9.62396112e-003],
       [6.44627142e-002, 1.22568101e-004],
       [7.69090333e-093, 2.23292998e-002],
       [9.94171011e-106, 1.61072059e-002],
       [1.10167268e-053, 5.59603853e-002],
       [8.96175586e-002, 7.44596253e-003],
       [4.40597225e-013, 4.87697985e-002],
       [1.34083125e-001, 5.10291966e-006],
       [7.07066977e-005, 1.53733708e-002],
       [6.91282393e-065, 5.94779155e-003],
       [2.68979935e-016, 2.57221552e-002],
       [9.60462265e-037, 5.60969210e-003],
       [5.44006703e-017, 1.02452087e-002],
       [1.28436092e-002, 9.37827696e-009],
       [1.22696279e-070, 1.65288199e-002],
       [2.05269185e-051, 2.86882188e-004],
       [2.17903307e-057, 5.42213232e-002],
       [1.47629469e-008, 1.36118159e-002],
       [1.82861034e-061, 4.77607222e-002],
       [7.75586997e-089, 8.49746019e-003],
       [1.05511050e-001, 1.30588878e-003],
       [5.60612982e-060, 3.62750906e-002],
       [5.6

In [12]:
ypreds= model.predict(X_test)
ypreds


array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0])

In [13]:
model.accuracy(y_test, ypreds)

98.0

# Logistic Regression

In [61]:
class LogisticRegression:
  '''
  The goal of this class is to create a LogisticRegression class, 
  that we will use as our model to classify data point into a corresponding class
  '''
  def __init__(self,lr,n_epochs):
    self.lr = lr
    self.n_epochs = n_epochs
    self.train_losses = []
    self.w = None
    self.weight = []

  def add_ones(self, x):
    one = np.ones((x.shape[0],1))
    return np.hstack((one,x))

  def sigmoid(self, x):
    return 1/(1+np.exp(-x@self.w))


  def cross_entropy(self, x, y_true):
    y_pred = self.sigmoid(x)
    loss = -np.mean(y_true*np.log(y_pred)+(1-y_true)*np.log(1-y_pred))
    return loss
  
  def predict_proba(self,x):  #This function will use the sigmoid function to compute the probalities
    x= self.add_ones(x)
    proba = self.sigmoid(x)
    return proba


  def predict(self,x):
    probas = self.predict_proba(x)
    output = [0 if p<0.5 else 1 for p in probas]#np.where(probas>=0.5, 1, 0)      #convert the probalities into 0 and 1 by using a treshold=0.5
    return output
 

  def fit(self,x,y):
    # Add ones to x
    x=self.add_ones(x)

    # reshape y if needed
    y=y.reshape(-1,1)

    # Initialize w to zeros vector >>> (x.shape[1])
    self.w=np.zeros((x.shape[1],1))

    for epoch in range(self.n_epochs):
      # make predictions
      ypred = self.sigmoid(x)

      #compute the gradient
      dl = (-1/x.shape[0])*(x.T@(y-ypred))

      #update rule
      self.w=self.w-self.lr*dl

      #Compute and append the training loss in a list
      loss = self.cross_entropy(x,y)
      self.train_losses.append(loss)

      if epoch%1000 == 0:
        print(f'loss for epoch {epoch}  : {loss}')

  def accuracy(self,y_true, y_pred):
    ##### WRITE YOUR CODE HERE #####
    acc = np.mean(y_true==y_pred)*100
    return acc
    #### END CODE ####

In [69]:
model = LogisticRegression(0.1,n_epochs=10000)
model.fit(X_train,y_train)

loss for epoch 0  : 0.6463167804519787
loss for epoch 1000  : 0.14239444677328827
loss for epoch 2000  : 0.1382027500077721
loss for epoch 3000  : 0.13709089867249624
loss for epoch 4000  : 0.13671354323231316
loss for epoch 5000  : 0.13656970131951102
loss for epoch 6000  : 0.13651127569994362
loss for epoch 7000  : 0.136486634817288
loss for epoch 8000  : 0.1364759972819431
loss for epoch 9000  : 0.13647133617478022


In [70]:
ypred_train = model.predict(X_train)
acc = model.accuracy(y_train,ypred_train)
print(f"The training accuracy is: {acc}")
print(" ")

ypred_test = model.predict(X_test)
acc = model.accuracy(y_test,ypred_test)
print(f"The test accuracy is: {acc}")

The training accuracy is: 96.0
 
The test accuracy is: 95.0
