In [15]:
import numpy as np
import pandas as pd
import os
import sys
import scipy
from scipy.linalg import logm
import math
import time
import matplotlib.pyplot as plt
from sklearn import datasets
import pickle
%matplotlib inline

In [5]:
os.chdir("C:\\Users\\MoeAn\\Documents\\GitHub\\LinearDiscriminantAnalysis")

In [6]:
iris = datasets.load_iris()
data1 = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
X = data1.drop(["target"],axis=1)
Y = data1["target"] 

In [7]:
class qdaClassifier:
    import numpy as np
    import pandas as pd
    import scipy
    import math
    import time
            
    def __init__(self,x,y):      
        self.X=x
        self.Y=y
        self.x = self.X.reset_index(drop = True)
        self.y = self.Y.reset_index(drop = True)
        self.sigmaMatrices = []
        self.invertedSigmaMatrices = []
        self.muVectors = []
        self.muTransposeVectors = []
        self.piVector =[]

        print("Checking for dependencies....")
        try:
            import numpy as np
            import pandas as pd
            import scipy
            import math
            import time
            print("Dependencies Loaded")
        
        except:
            print("Missing Dependencies!")
            print("All of the following packages must be installed:")
            print("pandas as pd ,numpy as np , scipy, math , time")
    
    def setX(self,x):
        self.X=x
        
    def setY(self,Y):
        self.Y=Y
        
    def getCovMatrix(self,x):
        sigma = x.cov()
        return sigma
    
    def getPiK(self,k,classColumn):
        matchingRowsCount = sum(classColumn==k)
        return matchingRowsCount/len(classColumn)
      
    def getVectorMu(self,x,y,k):
        x = x.reset_index(drop = True)
        y = y.reset_index(drop = True)
        
        classColumnName = y.name
        
        fullData = pd.concat([x,y],axis=1)
        
        vectorOfMeans = []
        for column in x :
            rowsThatMatchK = fullData.loc[fullData[classColumnName]==k]
            ourColumnInRowsThatMatchK = rowsThatMatchK[column]
            vectorOfMeans.append(ourColumnInRowsThatMatchK.mean())
        return np.transpose(np.asarray(vectorOfMeans))

    def getDeltaK(self,row,k,X,Y,sigmaInverse,sigma,mu,muTranspose,pi):
        rowTranspose=np.transpose(row)
        Delta = -0.5*rowTranspose.dot(sigmaInverse).dot(row)+rowTranspose.dot(sigmaInverse).dot(mu)-0.5*muTranspose.dot(sigmaInverse).dot(mu)-0.5*np.log(np.linalg.det(sigma))+math.log(pi)
        return Delta
    
    
    def trainQDA(self):
        
        x = self.x
        y = self.y
        print("Attempting to load dependencies..")
        try:
            import numpy as np
            import pandas as pd
            import scipy
            import math
            import time
        
        except:
            print("Missing Dependencies!")
            print("All of the following packages must be installed:")
            print("pandas as pd ,numpy as np , scipy, math , time")
        
        print("Dependencies Loaded")
        print("Initializing Classifier...")
        
        
        predictions = []
        classes = y.unique()
        
        print("Generating Covariance Matrices..")
        self.sigmaMatrices = []
        self.invertedSigmaMatrices = []
        for currentClass in classes:
            classColumnName = y.name
            fullData = pd.concat([x,y],axis=1)       
            currentClassX = fullData.loc[fullData[classColumnName] == currentClass].drop([classColumnName],axis=1)
            thisSigma = self.getCovMatrix(currentClassX)
            thisInvertedSigma = np.linalg.pinv(thisSigma)
            self.invertedSigmaMatrices.append(thisInvertedSigma)
            self.sigmaMatrices.append(thisSigma)
            print(thisSigma.shape)
            print("Sigmas Generated for "+str(currentClass))        
        
        print("Calculating Class-Specific Mean Vectors..")
        self.muVectors = []
        self.muTransposeVectors = []
        for currentClass in classes:
            thismu = self.getVectorMu(x,y,currentClass)
            thisTransposeMu = np.transpose(thismu)
            print("Mu for class "+str(currentClass)+" is "+str(thismu))
            self.muVectors.append(thismu)
            self.muTransposeVectors.append(thisTransposeMu)
        
        print("Calculating pi proportions....")
        self.piVector =[]
        for currentClass in classes:
            thispi = self.getPiK(currentClass,self.Y)
            print("Pi for class "+str(currentClass)+" is "+str(thispi))
            self.piVector.append(thispi)
    
    
    def predictQDA(self):
        x=self.x
        y=self.y
        classes = y.unique()    
            
        predictions = []
        
        print("Classifying...")
        rowCounter = 0
        for row in range(len(x)):
            notificationInterval =int(round(len(x)/20)) 
            if row % notificationInterval == 0:
                print(str(row)+" Out Of "+str(len(x))+" Observations Classified" )
            thisx = np.asarray(x.iloc[row])
            deltasForThisX = []
            deltaCounter = 0
            for delta in classes:                
                deltasForThisX.append(self.getDeltaK(x.iloc[row],delta,x,y,
                                                     self.invertedSigmaMatrices[deltaCounter],
                                                     self.sigmaMatrices[deltaCounter],
                                                     self.muVectors[deltaCounter],
                                                     self.muTransposeVectors[deltaCounter],
                                                     self.piVector[deltaCounter]
                                                    ))
                deltaCounter=deltaCounter+1         
            predictions.append(classes[np.argmax(deltasForThisX)])
            rowCounter=rowCounter+1
            
        return predictions
    
    def getUniqueClasses(self):
        uniqclasses = [str(i) for i in self.Y.unique]
        return uniqclasses


In [8]:
testqda = qdaClassifier(X,Y)

Checking for dependencies....
Dependencies Loaded


In [9]:
testqda.trainQDA()

Attempting to load dependencies..
Dependencies Loaded
Initializing Classifier...
Generating Covariance Matrices..
(4, 4)
Sigmas Generated for 0.0
(4, 4)
Sigmas Generated for 1.0
(4, 4)
Sigmas Generated for 2.0
Calculating Class-Specific Mean Vectors..
Mu for class 0.0 is [ 5.006  3.418  1.464  0.244]
Mu for class 1.0 is [ 5.936  2.77   4.26   1.326]
Mu for class 2.0 is [ 6.588  2.974  5.552  2.026]
Calculating pi proportions....
Pi for class 0.0 is 0.333333333333
Pi for class 1.0 is 0.333333333333
Pi for class 2.0 is 0.333333333333


In [10]:
pred3 =testqda.predictQDA()

Classifying...
0 Out Of 150 Observations Classified
8 Out Of 150 Observations Classified
16 Out Of 150 Observations Classified
24 Out Of 150 Observations Classified
32 Out Of 150 Observations Classified
40 Out Of 150 Observations Classified
48 Out Of 150 Observations Classified
56 Out Of 150 Observations Classified
64 Out Of 150 Observations Classified
72 Out Of 150 Observations Classified
80 Out Of 150 Observations Classified
88 Out Of 150 Observations Classified
96 Out Of 150 Observations Classified
104 Out Of 150 Observations Classified
112 Out Of 150 Observations Classified
120 Out Of 150 Observations Classified
128 Out Of 150 Observations Classified
136 Out Of 150 Observations Classified
144 Out Of 150 Observations Classified


In [11]:
sum(pred3==Y)/len(Y)

0.97999999999999998

In [12]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf = QuadraticDiscriminantAnalysis()
clf.fit(X,Y)
pred9 = clf.predict(X)
sum(pred9==Y)/len(Y)


0.97999999999999998

In [16]:
def save_object(obj, filename):
    with open(filename, 'wb') as output:
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)



In [17]:
# sample usage
save_object(pred3, 'LDATrained.pkl')