# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [9]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [10]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
# get data
print(x.shape, y.shape)

(1000, 3) (1000,)


In [11]:
x

array([[-0.42600458,  0.3092346 ,  1.13238592],
       [ 0.23750039,  0.85236655,  1.27032566],
       [-1.50956177,  0.57932947, -0.58204952],
       ...,
       [ 0.38796174,  1.01606996, -1.499496  ],
       [-0.74578403,  1.56454128, -1.05700466],
       [ 1.08716336, -0.29150009,  0.98548405]])

In [12]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
  
    n = int(len(x)*train_size)
    indices = np.arange(len(x))
    np.random.shuffle(indices)
    train_idx = indices[: n]
    test_idx = indices[n:]
    X_train, y_train = x[train_idx], y[train_idx]
    X_test, y_test = x[test_idx], y[test_idx]
    return X_train,X_test,y_train,y_test

In [13]:
X_train, X_test, y_train, y_test= split_data(x,y, train_size=0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [14]:
#class
np.unique(y)

array([0, 1])

In [15]:
#   def compute_mu(x,y,k):
#     d = x.shape[1]
#     self.mu = np.zeros((k,d))
#     for i in range(k):
#         idx = np.where(y==i)
#         x_trans = x[idx].T
#         for j in range(d):
#             self.mu[i,j]=np.mean(x_trans[j])
#     return self.mu

In [16]:
#   def compute_phi(self,x):
#         d =x.shape[1]
#         self.phi = np.zeros((d,1))
#         x_trans =x.T
#         for k in range(d):
#             self.phi[k]=np.mean(x_trans[k])
#         return self.phi

In [17]:
def covariance(x, mu):
    #mu.shape =(1,3) , x.shape =(m,3) for a single class
    m,d = x.shape
    sigma =np.zeros((d,d))
    for i in range(d):
        for j in range(d):
            cv =np.zeros(m)
            for k in range(m):
                cv[k]=(x[k,i]-mu[i])*(x[k,j]-mu[j])
            sigma[i,j]=np.mean(cv)
    return sigma
                

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.


In [18]:
#print('x.mean_col = ',x.mean(0))
#np.cov(x,x.mean(0))
indexs = np.where(y==0)
np.cov(x[indexs],rowvar=0)


mu = np.zeros((2,3))
for i in range(2):
    idx = np.where(y==i)
    x_trans = x[idx].T
    for j in range(3):
        mu[i,j]=np.mean(x_trans[j])
# mu

covariance(x[indexs], mu[0])

array([[ 0.87166222, -0.37375578, -0.06166966],
       [-0.37375578,  1.65431939,  0.09818823],
       [-0.06166966,  0.09818823,  0.03808229]])

In [42]:
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    self.mu = None
    self.phi = None
    self.sigma = None

  def fit(self,x,y):
    k= len(np.unique(y)) # Number of class.
    d=x.shape[1]  # input dim
    m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu=  np.zeros((k,d)) #: kxd, i.e., each row contains an individual class mu.
    self.phi = np.zeros((k,d))
    for i in range(k):
        idx = np.where(y==i)
        x_trans = x[idx].T
        for j in range(d):
            self.mu[i,j]=np.mean(x_trans[j])
            self.phi[i,j]=np.sum(x_trans[j])/len(x)
    #self.mu = compute_mu(x,y,k)
            
    self.sigma= np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    
    for lab in range(k):
        idx = np.where(y==lab)
        self.sigma[lab] = covariance(x[idx],self.mu[lab])
    
#     self.phi= np.zeros((d))# d-dimension
#     x_trans =x.T
#     for k in range(d):
#         self.phi[k]=np.mean(x_trans[k])
#     self.phi = compute_phi(x)

#     ## START THE LEARNING: estimate mu, phi and sigma.
#     self.mu = compute_mu(x,y,k)
    
#     self.phi = compute_phi(x)
    
#     for s in range(k):
#         idx = np.where(y==s)
#         self.sigma[s] = covariance(x[idx],self.mu[s])
    
#     return self.mu,self.phi,self.sigma
    print(f'mu = {self.mu}')
    print(f'phi = {self.phi}')
    print(f'sigma = {self.sigma}')
    return
    

  def predict_proba(self,x):
    # reshape or flatt x.
    n,d= x.shape
    k_class= 2 # Number of classes we have in our case it's k = 2
    

    ## START THE LEARNING: estimate mu, phi and sigma.
    p = np.zeros((n,k_class))
    for nn in range(n):
        for i in range(k_class):
#             p[i] = self.phi[i]
            p[nn,i]=(1/((2*np.pi)**d/2)*(np.linalg.det(self.sigma[i])**0.5))*np.exp(-0.5*(x[nn]-self.mu[i]).T@np.linalg.inv(self.sigma[i])@(x[nn]-self.mu[i]))
#     proba_xy = prob_x_y[0]*p[0]+prob_x_y[1]*p[1]

    return p
  
  def predict(self,ypred):
#     ypred = self.predict_proba(x)
    return ypred.argmax(axis=1)
    
  
  def accuracy(self, y, ypreds):
        accur = np.mean(y==ypreds)*100
        return accur
    

In [43]:
model= GDA()
model.fit(X_train,y_train)
print(model.predict_proba(X_test))

mu = [[ 1.00788754  1.08264488  1.00336162]
 [-0.96821477  0.96936658 -0.94888478]]
phi = [[ 0.51150293  0.54944228  0.50920602]
 [-0.47684577  0.47741304 -0.46732575]]
sigma = [[[ 0.85571916 -0.36378955 -0.06289291]
  [-0.36378955  1.52434025  0.09815525]
  [-0.06289291  0.09815525  0.03958737]]

 [[ 0.80893271  0.34415912  0.11229443]
  [ 0.34415912  0.35816188 -0.09239971]
  [ 0.11229443 -0.09239971  1.5871251 ]]]
[[4.51158856e-004 7.97260104e-009]
 [1.01504775e-003 3.02704269e-004]
 [1.07205165e-003 1.18190110e-006]
 [4.61795982e-004 4.79629336e-009]
 [6.00934498e-028 1.93436485e-003]
 [4.47145239e-036 9.89891576e-004]
 [3.87582620e-021 2.58741792e-003]
 [1.07586568e-033 1.59678389e-003]
 [7.03276508e-004 5.44274190e-004]
 [7.98822478e-033 3.71875760e-003]
 [7.13999990e-004 7.79295834e-007]
 [5.61049741e-004 4.84487991e-008]
 [1.24510237e-003 1.85578771e-004]
 [1.09990862e-004 1.91332961e-008]
 [2.13572418e-005 6.12216383e-004]
 [4.06412322e-035 3.50847261e-003]
 [1.78341462e-004 3

In [44]:
yproba= model.predict_proba(X_test)
yproba
pp = model.predict(yproba)
print(pp)

[0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 0 0 0 1 1 1
 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 0 0 1 1 1 1 0 0 0 0 1 1 0 1 0 1 0 1
 1 1 0 1 1 0 1 0 1 1 1 0 0 0 1 0 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1
 1 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 1
 1 0 1 0 1 1 0 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 1 0 0 1 1
 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0]


In [45]:
ypreds= model.predict(X_test)
ypreds


array([0, 1, 0, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2, 2, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1,
       1, 1, 2, 1, 0, 2, 0, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 0, 0, 1,
       2, 0, 1, 1, 1, 0, 2, 1, 1, 0, 1, 1, 2, 0, 2, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1, 2, 0, 2, 0, 2, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 1, 1, 2, 1, 0, 2, 1,
       1, 0, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 2, 0, 1, 2,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 2, 2, 1,
       1, 0])

In [46]:
model.accuracy(y_test, ypreds)

67.5