# Importing Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from numpy.linalg import inv

# Defining Deterministic Functions

In [None]:
def g5(InputVector,CovarianceMatrix,MeanVector,ProbabilityOfClass5):
    a=np.log(np.linalg.det(CovarianceMatrix))
    a=float(a)/2
    b=np.matmul(np.subtract(InputVector ,MeanVector),np.linalg.inv(CovarianceMatrix))
    b=np.matmul(b,(np.subtract(InputVector ,MeanVector)).transpose())
    b=float(b)/2
    c=np.log(ProbabilityOfClass5)
    return c-b-a


def g6(InputVector,CovarianceMatrix,MeanVector,ProbabilityOfClass5):
    a=np.log(np.linalg.det(CovarianceMatrix))
    a=float(a)/2
    b=np.matmul(np.subtract(InputVector ,MeanVector),np.linalg.inv(CovarianceMatrix))
    b=np.matmul(b,(np.subtract(InputVector ,MeanVector)).transpose())
    b=float(b)/2
    c=np.log(ProbabilityOfClass5)
    return c-b-a

# Reading the data

In [None]:
# .csv files must be in same folder as this code if not then give the exact path name.

df1 = pd.read_csv("P1_labels_train.csv",header=None)    #Reading labels of training set
df2 = pd.read_csv("P1_data_train.csv",header=None)      #Reading data of training set
df3 = pd.read_csv("P1_labels_test.csv",header=None)     #Reading labels of test set
df4 = pd.read_csv("P1_data_test.csv",header=None)       #Reading data of test set

label_train=df1.iloc[:,0]
label_test=df3.iloc[:,0]
label_train=np.asarray(label_train)                     #Labels of training set
label_test=np.asarray(label_test)                       #Labels of test set

# Function for calculating missclassifications

In [None]:
def missclass(label_test,MeanClass5,CovarianceClass5,MeanClass6,CovarianceClass6,ProbailityOf5):
    correct5=0 #correctly classified to class 5
    correct6=0 #correctly classified to class 6
    wrong5=0   #misclassified to class 5
    wrong6=0   #misclassified to class 6
    label=0
    for i in range(0,len(label_test)):
        if g5(np.asarray(df4.iloc[i:i+1]),CovarianceClass5,MeanClass5,ProbailityOf5)>g6(np.asarray(df4.iloc[i:i+1]),CovarianceClass6,MeanClass6,(1 - ProbailityOf5)):
            label=5
            if(label==label_test[i]):
                correct5+=1
            else:
                wrong5+=1
        else:
            label=6
            if(label==label_test[i]):
                correct6+=1
            else:
                wrong6+=1
    return correct5,correct6,wrong5,wrong6

# Calculating Mean Vectors

In [None]:
MeanClass5=[0]*64 
MeanClass6=[0]*64 
total5=0                                                              #Total number of samples labeled as 5
total6=0                                                              #Total number of samples labeled as 6
for i in range(0,len(label_train)):
    if (label_train[i]==5):
        total5+=1
        MeanClass5=np.add(MeanClass5,np.asarray(df2.iloc[i:i+1]))
    else:
        total6+=1
        MeanClass6=np.add(MeanClass6,np.asarray(df2.iloc[i:i+1]))
MeanClass5=MeanClass5/float(total5)                                   #estimate of Mean Vector for Class 5
MeanClass6=MeanClass6/float(total6)                                   #estimate of Mean Vector for Class 6

# Calculating probability of Class 5

In [None]:
p=float(total5)/len(label_train)                                      #Estimate of aprior probability of Class 5
print "Estimated aprior probability of class 5 :",p
print "Estimated aprior probability of class 6 :",1-p

# Calculating Covariance Matrics

In [None]:
S_5 = [0]*64
S_6 = [0]*64
CovarianceClass5 = np.zeros((64,64))
CovarianceClass6 = np.zeros((64,64))
for i in range(0,len(label_train)):
    if (label_train[i]==5):
        S_5 = np.subtract(np.asarray(df2.iloc[i:i+1]),MeanClass5)
        CovarianceClass5 = np.add(CovarianceClass5,np.matmul(S_5.transpose(),S_5))
    else:
        S_6 = np.subtract(np.asarray(df2.iloc[i:i+1]),MeanClass6)
        CovarianceClass6 = np.add(CovarianceClass6,np.matmul(S_6.transpose(),S_6))
CovarianceClass5 = CovarianceClass5/float(total5) #estimate of covariance matrix for class 5
CovarianceClass6 = CovarianceClass6/float(total6) #estimate of covariance matrix for class 6

# CASE 1: Original Covariance matrices

### Misclassification for CASE 1

In [None]:
print "CASE 1: Original Covariance matrices"
correct5,correct6,wrong5,wrong6=missclass(label_test,MeanClass5,CovarianceClass5,MeanClass6,CovarianceClass6,p)
print "------------------------------------"
print "Number of data points with label 5 and classified as label 5 :",correct5
print "Number of data points with label 5 and classified as label 6 :",wrong6
print "Number of data points with label 6 and classified as label 6 :",correct6
print "Number of data points with label 6 and classified as label 5 :",wrong5
print "------------------------------------"
missclassification5=(float(wrong6)/(wrong6+correct5))*100
missclassification6=(float(wrong5)/(wrong5+correct6))*100
print('The misclassification rate for class 5 is %f '%(missclassification5) + '%')
print('The misclassification rate for class 6 is %f '%(missclassification6) + '%')

### Confusion Matrix for CASE 1

In [None]:
ConfusionMatrix = np.matrix([[correct5,wrong6], [wrong5,correct6]])

# Printing the Confusion Matrix

print('The Confusion Matrix for CASE 1 is as follows :\n')
ConfusionMatrix= np.array(ConfusionMatrix)
UpperTitle = ["  Predicted Label 5", "  Predicted Label 6"]
LeftTitle = ["Actual Label 5", "Actual Label 6"]
row_format ="{:>15}" * (len(UpperTitle) + 1)
print row_format.format("", *UpperTitle)
for x, row in zip(LeftTitle, ConfusionMatrix):
    print row_format.format(x, *row)

# CASE 2: Shared covariance matrix (Weighted Mean)

In [None]:
#Taking the weighted average

CovarianceClass5new = (p*CovarianceClass5)+((1-p)*CovarianceClass6) 
CovarianceClass6new = CovarianceClass5new

### Misclassification for CASE 2

In [None]:
print "CASE 2: Shared covariance matrix (Weighted Mean)"
correct5,correct6,wrong5,wrong6=missclass(label_test,MeanClass5,CovarianceClass5new,MeanClass6,CovarianceClass6new,p)
print "------------------------------------"
print "Number of data points with label 5 and classified as label 5 :",correct5
print "Number of data points with label 5 and classified as label 6 :",wrong6
print "Number of data points with label 6 and classified as label 6 :",correct6
print "Number of data points with label 6 and classified as label 5 :",wrong5
print "------------------------------------"
missclassification5=(float(wrong6)/(wrong6+correct5))*100
missclassification6=(float(wrong5)/(wrong5+correct6))*100
print('The misclassification rate for class 5 is %f '%(missclassification5) + '%')
print('The misclassification rate for class 6 is %f '%(missclassification6) + '%')

### Confusion Matrix for CASE 2

In [None]:
ConfusionMatrix = np.matrix([[correct5,wrong6], [wrong5,correct6]])

# Printing the Confusion Matrix

print('The Confusion Matrix for CASE 2 is as follows :\n')
ConfusionMatrix= np.array(ConfusionMatrix)
UpperTitle = ["  Predicted Label 5", "  Predicted Label 6"]
LeftTitle = ["Actual Label 5", "Actual Label 6"]
row_format ="{:>15}" * (len(UpperTitle) + 1)
print row_format.format("", *UpperTitle)
for x, row in zip(LeftTitle, ConfusionMatrix):
    print row_format.format(x, *row)

# CASE 3: Shared covariance matrix (Arithmetic Mean)

In [None]:
#Taking the arithmetic mean
CovarianceClass5new=(CovarianceClass5+CovarianceClass6)/float(2)
CovarianceClass6new=CovarianceClass5new

### Misclassification for CASE 3

In [None]:
print "CASE 3: Shared covariance matrix (Arithmetic Mean)"
correct5,correct6,wrong5,wrong6=missclass(label_test,MeanClass5,CovarianceClass5new,MeanClass6,CovarianceClass6new,p)
print "------------------------------------"
print "Number of data points with label 5 and classified as label 5 :",correct5
print "Number of data points with label 5 and classified as label 6 :",wrong6
print "Number of data points with label 6 and classified as label 6 :",correct6
print "Number of data points with label 6 and classified as label 5 :",wrong5
print "------------------------------------"
missclassification5=(float(wrong6)/(wrong6+correct5))*100
missclassification6=(float(wrong5)/(wrong5+correct6))*100
print('The misclassification rate for class 5 is %f '%(missclassification5) + '%')
print('The misclassification rate for class 6 is %f '%(missclassification6) + '%')

### Confusion Matrix for CASE 3

In [None]:
ConfusionMatrix = np.matrix([[correct5,wrong6], [wrong5,correct6]])

# Printing the Confusion Matrix

print('The Confusion Matrix for CASE 3 is as follows :\n')
ConfusionMatrix= np.array(ConfusionMatrix)
UpperTitle = ["  Predicted Label 5", "  Predicted Label 6"]
LeftTitle = ["Actual Label 5", "Actual Label 6"]
row_format ="{:>15}" * (len(UpperTitle) + 1)
print row_format.format("", *UpperTitle)
for x, row in zip(LeftTitle, ConfusionMatrix):
    print row_format.format(x, *row)