In [1]:
import numpy as np
import pandas as pd

In [13]:
class GDA(object):
    
    def __init__(self, features, target):
        self.features=features
        self.target=target
        self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==0.0]].mean()
        self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==1.0]].mean()
        self.phi=(1.0/self.target.shape[0])*self.target[self.target==1.0].count()

    def P_y(self, y, phi):
        return phi**y * (1-phi)**(1-y)
    
    def P_x_y(self, sigma, x, mu):
        comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(np.linalg.det(sigma)))
        comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))
        return comp1*comp2

        
    def Sigma(self):
        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))
        for i in range(self.target.shape[0]):
            if self.target.iloc[i]==0:
                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, np.matrix(self.features.iloc[i, :]-self.mu0))
            
            else:
                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T, np.matrix(self.features.iloc[i, :]-self.mu1))
                
        return (1.0/self.target.shape[0])*sigma
        
    def predict(self, X):
        predictions=[]
        for i in range(X.shape[0]):
            Prob0=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)
            Prob1=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)
            if Prob0>Prob1:
                predictions.append(0.0)
            else:
                predictions.append(1.0)
        return np.array(predictions)
            

In [18]:
mu1 = -1
mu2 = 3
sig1 = 0.5
sig2 = 1
N = 150
np.random.seed(10)
x11=np.random.randn(N,1)*sig1 + mu1
x12=np.random.randn(N,1)*sig1 + mu1+3
x21=np.random.randn(N,1)*sig2 + mu2
x22=np.random.randn(N,1)*sig2 + mu2+3
c = np.vstack((np.zeros((N,1)), np.ones((N,1))))
x1 = np.hstack((x11,x12))
x2 = np.hstack((x21,x21))

X = np.hstack( (np.vstack( (x1,x2) ),c) )
np.random.shuffle(X)
dataset = pd.DataFrame(data=X, columns=['x','y','c'])


In [19]:
Data_xy=dataset.drop('c', axis=1)
target=dataset['c']

In [20]:
mu1 = -1
mu2 = 3
sig1 = 0.5
sig2 = 1
N1 = 100
np.random.seed(10)
x11=np.random.randn(N1,1)*sig1 + mu1
x12=np.random.randn(N1,1)*sig1 + mu1+3
x21=np.random.randn(N1,1)*sig2 + mu2
x22=np.random.randn(N1,1)*sig2 + mu2+3
c = np.vstack((np.zeros((N1,1)), np.ones((N1,1))))
x1 = np.hstack((x11,x12))
x2 = np.hstack((x21,x22))

X = np.hstack( (np.vstack( (x1,x2) ),c) )
np.random.shuffle(X)
dataset1 = pd.DataFrame(data=X, columns=['x','y','c'])

In [21]:
Gaussian=GDA(Data_xy, target)

Features_test=dataset1[['x', 'y']]
Target_test=dataset1[['c']]

predictions=Gaussian.predict(Features_test)
predictions=predictions.reshape(-1,1)
print("Accuracy: ", ((predictions==np.array(Target_test)).sum()/Target_test.shape[0])*100, "%")

Accuracy:  66.5 %


In [23]:
from sklearn import datasets

Iris=datasets.load_iris()
data=Iris['data']
Data=np.hstack([data, Iris['target'].reshape(-1,1)])
Random=list(range(data.shape[0]))
np.random.shuffle(Random)
Data=Data[Random]
Data
Col=Iris['feature_names']
Col.insert(len(Col), 'target')
Data=pd.DataFrame(Data, columns=Col)
Data.head()
Data=Data[(Data['target']==0) | (Data['target']==1)]

Train_data=Data.iloc[0:10]
Test_data=Data.iloc[10:]
print("Train data is 10% of the data ",Train_data.shape)
print("Test data is 90% of the data ",Test_data.shape)

Train data is 10% of the data  (10, 5)
Test data is 90% of the data  (90, 5)


In [24]:
GaussianIris=GDA(Train_data.iloc[:,:-1], Train_data.iloc[:,-1])

#GaussianIris.Sigma()

pred=GaussianIris.predict(Test_data.iloc[:,:-1])
pred=pred.reshape(-1,1)
print("Accuracy: ", (pred==np.array(Test_data.iloc[:,-1]).reshape(-1,1)).sum()/Test_data.iloc[:,-1].shape[0]*100, "%")

Accuracy:  100.0 %
