# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [11]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
print(x.shape, y.shape)

(1000, 3) (1000,)


In [12]:
d=np.column_stack((y,x))
d

array([[ 0.        , -0.42600458,  0.3092346 ,  1.13238592],
       [ 0.        ,  0.23750039,  0.85236655,  1.27032566],
       [ 1.        , -1.50956177,  0.57932947, -0.58204952],
       ...,
       [ 1.        ,  0.38796174,  1.01606996, -1.499496  ],
       [ 1.        , -0.74578403,  1.56454128, -1.05700466],
       [ 0.        ,  1.08716336, -0.29150009,  0.98548405]])

In [13]:
df1 = np.random.permutation(d.shape[0])
d=d[df1]
d


array([[ 1.        ,  0.17117267,  1.37742284, -1.31788001],
       [ 0.        ,  2.11454193, -0.44903598,  1.00678715],
       [ 1.        ,  0.80417989,  1.75396714, -1.12951411],
       ...,
       [ 1.        , -2.76534636, -0.55921191,  0.10615199],
       [ 1.        , -2.36301831,  0.98456191, -0.30421328],
       [ 1.        , -0.51412783,  1.03221339, -0.0761302 ]])

In [14]:
def split_data(x,y, train_size= 0.8):
    
    X_train=d[0:int(train_size*x.shape[0]),1:4]
    y_train=d[0:int(train_size*x.shape[0]),0].reshape(-1,1)
    X_test=d[int(train_size*x.shape[0]):,1:4]
    y_test=d[int(train_size*x.shape[0]):,0].reshape(-1,1)
    return X_train,y_train,X_test,y_test

In [15]:
X_train,  y_train,X_test, y_test= split_data(x,y, train_size= 0.8)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800, 1) (200, 3) (200, 1)


In [16]:
mu = np.zeros((x.shape[1]))
def covariance(x,mu):
    D, N = x.shape
    covariance = np.zeros((N, N))
    #mu = np.mean(x, axis=0)
    for i in range(N):
        for j in range(N):
            covariance[i,j]=(1/(D-1)*np.sum(x[:,i]*x[:,j]))-(D*mu[i]*mu[j])/(D-1)

    return covariance


In [18]:
covariance(x,mu)

array([[1.84512203, 0.01491822, 1.0009886 ],
       [0.01491822, 2.00121557, 0.08515274],
       [1.0009886 , 0.08515274, 1.74920615]])

In [19]:
np.cov(x,rowvar=0)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [20]:
phi=np.zeros(2)
for i in range(2):
    phi[i]=np.mean(y==i)
phi

array([0.498, 0.502])

In [21]:
m=np.zeros((2,3))
for i in range(2):
    for j in range(3):
        ko=np.where(y==i)
        b=x[ko]
        m[i,j]=np.mean(b[:,j].T)
m


array([[ 0.99515416,  1.04188282,  0.99941748],
       [-1.01309105,  0.95696514, -0.93218425]])

In [23]:
np.mean(x[:,0])

-0.012984932798671576

In [33]:
class GDA:
    def __init__(self):
    ## set mu, phi and sigma to None
        self.mu=None
        self.phi=None
        self.sigma=None
    
    def fit(self,x,y):
        k=2#Number of class.
        d=x.shape[1] # input dim
        m= x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
        self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
        self.mu=self.muy(x,y,2,3)
        self.sigma= np.zeros((2,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
        self.phi=np.zeros(k)# d-dimension
    

    ## START THE LEARNING: estimate mu, phi and sigma.
    def muy(self,x,y,k,d):
        self.mu= np.zeros((k,d))
        self.phi= np.zeros((k,d))
        for i in range(k):
            for j in range(d):
                ko=np.where(y==i)
                b=x[ko]
                self.mu[i,j]=np.mean(b[:,j])
                self.phi[i,j]=np.sum(b[:,j])/len(x)
        return self.mu,self.phi

            
            
            


    
    ## phi
    def sigma(x,mu):
        n,d=x.shape
        self.sigma=np.zeros((d,d))
        for k in range(d):
            for j in range(d):
                values=np.zeros(n)
                for i in range(n):
                    values[i]= (x[i][k]-mu[k])*(x[i][j]-mu[j])
                    self.sigma[k,j]=np.mean(values)
        return self.sigma
                    
                 
    def predict_proba(self,x):
    # reshape or flatt x.
        n,d= x.shape
        k_class= 2 # Number of classes we have in our case it's k = 2
    

    ## START THE LEARNING: estimate mu, phi and sigma.
        p = np.zeros((n,k_class))
        for nn in range(n):
            for i in range(k_class):
#             p[i] = self.phi[i]
                p[nn,i]=(1/((2*np.pi)**d/2)*(np.linalg.det(self.sigma[i])**0.5))*np.exp(-0.5*(x[nn]-self.mu[i]).T@np.linalg.inv(self.sigma[i])@(x[nn]-self.mu[i]))
#     
        return p
  
        def predict(self,ypred):
#     ypred = self.predict_proba(x)
            return ypred.argmax(axis=1)

        
        
        
    
        
        


    

In [30]:
model= GDA()
model.fit(X_train,y_train)
model.mu
print(model.sigma)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [31]:
yproba= model.predict_proba(X_test)
yproba

TypeError: 'NoneType' object is not subscriptable

In [32]:
ypreds= model.predict(X_test)
ypreds


AttributeError: 'GDA' object has no attribute 'predict'

In [28]:
model.accuracy(y_test, ypreds)

AttributeError: 'GDA' object has no attribute 'accuracy'