# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [2]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y=generate_data()# get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [3]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
  
  n=x.shape[0]

  suffle=np.random.permutation(n) 
  x_suffle=x[suffle]
  y_suffle=y[suffle]

  x_train=x_suffle[:int(train_size*n)]

  y_train=y_suffle[:int(train_size*n)]

  x_test=x_suffle[int(train_size*n):]

  y_test=y_suffle[int(train_size*n):]

  return x_train,x_test,y_train,y_test

In [4]:
X_train, X_test, y_train, y_test=split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [5]:
def covariance(x,mu):

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.

  n,d=x.shape

  cov=np.zeros((d,d))

  for i in range(d):

    for j in range(d):

      cov[i][j]=( np.sum( (x[:,i]-mu[i]) * (x[:,j]-mu[j]) ) )/(n-1)

  return cov


def mu_(x,y,k):
    n,d=x.shape
    indicator=0
    x_sum=np.zeros(d)
    for i in range(n):
      if y[i]==k:
        indicator+=1
        x_sum+=x[i]
    return x_sum/indicator


def phi_(y,k):
    n=y.shape[0]
    indicator=0
    for i in range(n):
       if y[i]==k:
        indicator+=1
    return indicator/n


def gaussien_density(x,mu,sigma):
  #x=x.reshape(1,-x.shape[0])
  #mu=mu.reshape(1,-mu.shape[0])
  #d=x.shape[1]

  d=x.shape[0]

  const=(2*np.pi)**(d/2)

  det=np.linalg.det(sigma)
  
  x_mean=x-mu

  X=( x_mean.T@(np.linalg.inv(sigma)))@(x_mean)

  return np.exp(-X/2)/((const)*(det**(1/2)))


def bernoulli(phi,k):
  return (phi**k)*((1-phi)**(1-k))





In [6]:
x1=np.array([[2,3],[1,2],[2,4],[0,2]])

In [7]:
y1=np.array([[1],[0],[1],[0]])

In [8]:
mu_x=mu_(x1,y1,0)
mu_x

array([0.5, 2. ])

In [9]:
mu_y=mu_(x1,y1,1)
mu_y

array([2. , 3.5])

In [10]:
a=np.array([[2,1],[4,5],[1,2],[3,4],[7,6]])
b=np.array([[1],[1],[1],[1],[1]])
mu=mu_(a,b,1)
mu

array([3.4, 3.6])

In [11]:
np.cov(a,rowvar=0)

array([[5.3, 4.2],
       [4.2, 4.3]])

In [12]:
covariance(a,a.mean(0))

array([[5.3, 4.2],
       [4.2, 4.3]])

In [13]:
np.cov(x,rowvar=0)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [14]:
covariance(x,x.mean(0))

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [15]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu=None
    self.sigma=None
    self.phi=None

    
  def fit(self,x,y):
    k=2 # Number of class.
    d=x.shape[1] # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu=np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma=np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(k)# k-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.

    for lab in range(k):

      bool_=(lab==y)

      self.phi[lab]=phi_(y,lab)

      self.mu[lab]=mu_(x,y,lab)

      self.sigma[lab]=covariance(x[bool_],self.mu[lab])


  def predict_proba(self,x):
    # reshape or flatt x.
   # x= ...
    d=x.shape[1]
    k_class= self.mu.shape[0]# Number of classes we have in our case it's k = 2
    
    ## START THE LEARNING: estimate mu, phi and sigma.
    proba=np.zeros((x.shape[0],k_class))

    for lab in range(k_class):

      for i in range(x.shape[0]):
        
        ###proba[i][lab]=gaussien_density(x[i],self.mu[lab],self.sigma[lab])*bernoulli(self.phi[lab],lab)

        proba[i][lab]=gaussien_density(x[i],self.mu[lab],self.sigma[lab])*self.phi[lab]
    
    return proba


  def predict(self,x):

    proba=self.predict_proba(x)

    y_pred=np.argmax(proba,axis=1)

    #self.y_pred=y_pred

    return y_pred
  
  
  def accuracy(self, y, ypreds):
    return np.mean(y==ypreds)*100



In [16]:
model= GDA()
model.fit(X_train,y_train)

In [17]:
model.phi

array([0.50875, 0.49125])

In [18]:
model.mu

array([[ 1.00123741,  1.04149184,  1.00521082],
       [-1.03088605,  0.93586606, -0.90658417]])

In [19]:
model.sigma

array([[[ 0.86635589, -0.40775594, -0.06279293],
        [-0.40775594,  1.69538858,  0.09364282],
        [-0.06279293,  0.09364282,  0.03739212]],

       [[ 0.82502947,  0.34636887,  0.14448122],
        [ 0.34636887,  0.36220622, -0.08101061],
        [ 0.14448122, -0.08101061,  1.55972549]]])

In [20]:
#gaussien_density(X_train[0],model.mu[0],model.sigma[0])

In [21]:
yproba= model.predict_proba(X_test)
yproba

array([[6.15700427e-027, 4.52697623e-002],
       [1.26970047e-001, 3.23150203e-003],
       [1.02813491e-133, 6.08766125e-003],
       [5.67265337e-004, 1.52932347e-002],
       [7.45479913e-222, 1.07425721e-005],
       [3.75403159e-002, 1.62623460e-005],
       [4.22086723e-016, 2.18838400e-002],
       [1.58525091e-001, 3.19326135e-004],
       [1.16782475e-058, 4.91958194e-002],
       [2.66073970e-036, 4.01921685e-002],
       [1.03254495e-001, 1.47712282e-004],
       [3.26855125e-002, 8.63677293e-004],
       [2.28455420e-087, 2.04008328e-002],
       [6.82023233e-027, 5.12438165e-002],
       [3.12110915e-006, 2.55113652e-003],
       [1.88655184e-008, 3.29943707e-002],
       [1.74750518e-015, 1.78349580e-002],
       [5.60585691e-002, 1.73643043e-008],
       [2.08803572e-012, 2.97301754e-002],
       [4.33110147e-004, 1.50665839e-002],
       [1.08504654e-082, 2.77267592e-002],
       [2.84454807e-002, 1.76338663e-009],
       [1.32270984e-001, 2.28364906e-003],
       [5.8

In [22]:
ypreds= model.predict(X_test)
ypreds


array([1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1])

In [23]:
model.accuracy(y_test, ypreds)

98.5