# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
from random import choices
from collections import Counter

# Necessary Functions 

In [2]:
def Boro(num1, num2, num3):
    if (num1 > num2) and (num1 > num3):
        boro = 1
    elif (num2 > num1) and (num2 > num3):
        boro = 2
    else:
        boro = 3
    return boro

In [3]:
def Bayesclassification(number, act_miu1, act_miu2, act_sigma1, act_sigma2, act_prior1, act_prior2):
    random_seed=0
    dist1=multivariate_normal(cov = act_sigma1, mean = act_miu1, seed = random_seed)
    dist2=multivariate_normal(cov = act_sigma2, mean = act_miu2, seed = random_seed)
    # Generate a series of 1s & 2s with above probability.
    x=choices(population=[1,2], weights=[act_prior1,act_prior2], k=number)
    y=Counter(x)
    mid1=y[1]//2
    mid2=y[2]//2
    # Sampling from the distribution of the FIRST class
    data1 = dist2.rvs(size = y[1])
    data1_train=data1[:mid1]
    data1_test=data1[mid1:]
    
    # Sampling from the distribution of the SECOND class
    data2 = dist1.rvs(size = y[2])
    data2_train=data2[:mid2]
    data2_test=data2[mid2:]
    
    # Get the total number of data for train
    total_train_records=len(data1_train)+len(data2_train)
    
    # Estimate the values of mean and cov matrices
    miu1_hat=np.mean(data1_train,0)
    sigma1_hat=np.cov(data1_train,rowvar=False)
    miu2_hat=np.mean(data2_train,0)
    sigma2_hat=np.cov(data2_train,rowvar=False)
    
    # Estimate the prior probability
    prior1_hat=len(data1_train)/total_train_records
    prior2_hat=len(data2_train)/total_train_records
    
    #Actual class data
    actual1=np.repeat(1,len(data1_test))
    actual2=np.repeat(2,len(data2_test))
    actual_classes=np.concatenate((actual1,actual2), axis=0)
    
    # Concatinate the data that is coming from both the classes for final classification
    final_test_data=np.concatenate((data1_test,data2_test), axis=0)
    
    # ESTIMATED the mis-classification rate
    # Calculate the components of the posteriori probability
    denom1=multivariate_normal.pdf(final_test_data, mean=miu1_hat, cov=sigma1_hat)*prior1_hat
    denom2=multivariate_normal.pdf(final_test_data, mean=miu2_hat, cov=sigma2_hat)*prior2_hat
    denominatior=denom1+denom2
    
    # Conpute the posteriori probability for both the classes
    class1_prob=denom1/denominatior
    class2_prob=denom2/denominatior
    
    class_prob_diff=class1_prob-class2_prob
    predicted_classes=[1 if x>0 else 2 for x in class_prob_diff]
    classification_diff=actual_classes-predicted_classes    
    estimated_mis_class_rate=len([1 for x in classification_diff if x!=0])/len(actual_classes)
    #estimated_mis_class_rate
    print("Misclassification Rate for ESTIMATED Parameters: ",estimated_mis_class_rate)
    
    # ACTUAL the mis-classification rate
    # Calculate the components of the posteriori probability
    denom1=multivariate_normal.pdf(final_test_data, mean=miu1, cov=sigma1)*act_prior1
    denom2=multivariate_normal.pdf(final_test_data, mean=miu2, cov=sigma2)*act_prior2
    denominatior=denom1+denom2
    
    # Conpute the posteriori probability for both the classes
    class1_prob=denom1/denominatior
    class2_prob=denom2/denominatior
    
    class_prob_diff=class1_prob-class2_prob
    predicted_classes=[1 if x>0 else 2 for x in class_prob_diff]
    classification_diff=actual_classes-predicted_classes    
    actual_mis_class_rate=len([1 for x in classification_diff if x!=0])/len(actual_classes)
    
    print("Misclassification Rate for ACTUAL Parameters: ",actual_mis_class_rate)
    
    return(actual_mis_class_rate-estimated_mis_class_rate)

In [4]:
def BayesclassificationTHREE(number, act_miu1, act_miu2, act_miu3, act_sigma1, act_sigma2, act_sigma3, act_prior1, act_prior2, act_prior3):
    random_seed=0
    dist1=multivariate_normal(cov = act_sigma1, mean = act_miu1, seed = random_seed)
    dist2=multivariate_normal(cov = act_sigma2, mean = act_miu2, seed = random_seed)
    dist3=multivariate_normal(cov = act_sigma3, mean = act_miu3, seed = random_seed)
    # Generate a series of 1s & 2s with above probability. It will be used to invoke the sampling function
    x=choices(population=[1,2,3], weights=[act_prior1,act_prior2,act_prior3], k=number)
    y=Counter(x)
    mid1=y[1]//2
    mid2=y[2]//2
    mid3=y[3]//2

    # Sampling from the distribution of the FIRST class
    data1 = dist1.rvs(size = y[1])
    data1_train=data1[:mid1]
    data1_test=data1[mid1:]
    
    # Sampling from the distribution of the SECOND class
    data2 = dist2.rvs(size = y[2])
    data2_train=data2[:mid2]
    data2_test=data2[mid2:]
    
    # Sampling from the distribution of the THIRD class
    data3 = dist3.rvs(size = y[3])
    data3_train=data3[:mid3]
    data3_test=data3[mid3:]

    # Get the total number of data for train
    total_train_records=len(data1_train)+len(data2_train)+len(data3_train)
    
    # Estimate the values of mean and cov matrices
    miu1_hat=np.mean(data1_train,0)
    sigma1_hat=np.cov(data1_train,rowvar=False)
    miu2_hat=np.mean(data2_train,0)
    sigma2_hat=np.cov(data2_train,rowvar=False)
    miu3_hat=np.mean(data3_train,0)
    sigma3_hat=np.cov(data3_train,rowvar=False)

    
    # Estimate the prior probability
    prior1_hat=len(data1_train)/total_train_records
    prior2_hat=len(data2_train)/total_train_records
    prior3_hat=len(data3_train)/total_train_records
    
    #Actual class data
    actual1=np.repeat(1,len(data1_test))
    actual2=np.repeat(2,len(data2_test))
    actual3=np.repeat(3,len(data3_test))
    actual_classes=np.concatenate((actual1,actual2,actual3), axis=0)
    
    # Concatinate the data that is coming from both the classes for final classification
    final_test_data=np.concatenate((data1_test,data2_test,data3_test), axis=0)
    
    # ESTIMATED the mis-classification rate
    # Calculate the components of the posteriori probability
    denom1=multivariate_normal.pdf(final_test_data, mean=miu1_hat, cov=sigma1_hat)*prior1_hat
    denom2=multivariate_normal.pdf(final_test_data, mean=miu2_hat, cov=sigma2_hat)*prior2_hat
    denom3=multivariate_normal.pdf(final_test_data, mean=miu3_hat, cov=sigma3_hat)*prior3_hat
    denominatior=denom1+denom2+denom3
    
    # Conpute the posteriori probability for both the classes
    class1_prob=denom1/denominatior
    class2_prob=denom2/denominatior
    class3_prob=denom3/denominatior
        
    #predicted_classes=largest(class1_prob,class2_prob,class3_prob)
    #print(predicted_classes)
    predicted_classes=np.repeat(0,len(class1_prob))
    for i in range(len(class1_prob)):
        predicted_classes[i]=Boro(class1_prob[i],class2_prob[i],class3_prob[i])
    
    classification_diff=actual_classes-predicted_classes    
    estimated_mis_class_rate=len([1 for x in classification_diff if x!=0])/len(actual_classes)
    #estimated_mis_class_rate
    print("Misclassification Rate for ESTIMATED Parameters: ",estimated_mis_class_rate)
    
    # ACTUAL the mis-classification rate
    # Calculate the components of the posteriori probability
    denom1=multivariate_normal.pdf(final_test_data, mean=miu1, cov=sigma1)*act_prior1
    denom2=multivariate_normal.pdf(final_test_data, mean=miu2, cov=sigma2)*act_prior2
    denom3=multivariate_normal.pdf(final_test_data, mean=miu3, cov=sigma3)*act_prior3
    denominatior=denom1+denom2+denom3
    
    # Conpute the posteriori probability for both the classes
    class1_prob=denom1/denominatior
    class2_prob=denom2/denominatior
    class3_prob=denom3/denominatior
    
    predicted_classes=np.repeat(0,len(class1_prob))
    for i in range(len(class1_prob)):
        predicted_classes[i]=Boro(class1_prob[i],class2_prob[i],class3_prob[i])

    
    #class_prob_diff=class1_prob-class2_prob
    #predicted_classes=[1 if x>0 else 2 for x in class_prob_diff]
    classification_diff=actual_classes-predicted_classes    
    actual_mis_class_rate=len([1 for x in classification_diff if x!=0])/len(actual_classes)
    
    print("Misclassification Rate for ACTUAL Parameters: ",actual_mis_class_rate)
    
    return(actual_mis_class_rate-estimated_mis_class_rate)


In [5]:
miu1=np.array([0,0])
sigma1=np.array([[1, 0], [0, 1]])

miu2=np.array([0,1])
sigma2=np.array([[1, -0.5], [-0.5, 2]])

miu3=np.array([-1,0.5])
sigma3=np.array([[0.75, 0.2], [0.2, 0.8]])

miu4=np.array([-2,1.3])
sigma4=np.array([[2, 0.8], [0.8, 1.6]])

In [6]:
#Case 1
misclass_rate_diff_n2000=Bayesclassification(2000,miu1,miu2,sigma1,sigma2,0.75,0.25)
misclass_rate_diff_n5000=Bayesclassification(5000,miu1,miu2,sigma1,sigma2,0.75,0.25)
misclass_rate_diff_n10000=Bayesclassification(10000,miu1,miu2,sigma1,sigma2,0.75,0.25)
misclass_rate_diff_n20000=Bayesclassification(20000,miu1,miu2,sigma1,sigma2,0.75,0.25)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.22377622377622378
Misclassification Rate for ACTUAL Parameters:  0.4355644355644356
Misclassification Rate for ESTIMATED Parameters:  0.2412
Misclassification Rate for ACTUAL Parameters:  0.4764
Misclassification Rate for ESTIMATED Parameters:  0.2332
Misclassification Rate for ACTUAL Parameters:  0.4572
Misclassification Rate for ESTIMATED Parameters:  0.24067593240675933
Misclassification Rate for ACTUAL Parameters:  0.46765323467653236
Misclass Rate when N=2000:  0.2117882117882118
Misclass Rate when N=5000:  0.2352
Misclass Rate when N=10000:  0.224
Misclass Rate when N=20000:  0.22697730226977303


In [7]:
#Case 2
misclass_rate_diff_n2000=Bayesclassification(2000,miu1,miu2,sigma1,sigma2,0.5,0.5)
misclass_rate_diff_n5000=Bayesclassification(5000,miu1,miu2,sigma1,sigma2,0.5,0.5)
misclass_rate_diff_n10000=Bayesclassification(10000,miu1,miu2,sigma1,sigma2,0.5,0.5)
misclass_rate_diff_n20000=Bayesclassification(20000,miu1,miu2,sigma1,sigma2,0.5,0.5)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.3156843156843157
Misclassification Rate for ACTUAL Parameters:  0.6863136863136863
Misclassification Rate for ESTIMATED Parameters:  0.3314674130347861
Misclassification Rate for ACTUAL Parameters:  0.6713314674130347
Misclassification Rate for ESTIMATED Parameters:  0.32993401319736054
Misclassification Rate for ACTUAL Parameters:  0.6706658668266346
Misclassification Rate for ESTIMATED Parameters:  0.3210678932106789
Misclassification Rate for ACTUAL Parameters:  0.677032296770323
Misclass Rate when N=2000:  0.37062937062937057
Misclass Rate when N=5000:  0.3398640543782486
Misclass Rate when N=10000:  0.3407318536292741
Misclass Rate when N=20000:  0.35596440355964404


In [8]:
#Case 3
misclass_rate_diff_n2000=Bayesclassification(2000,miu1,miu2,sigma1,sigma2,0.6,0.4)
misclass_rate_diff_n5000=Bayesclassification(5000,miu1,miu2,sigma1,sigma2,0.6,0.4)
misclass_rate_diff_n10000=Bayesclassification(10000,miu1,miu2,sigma1,sigma2,0.6,0.4)
misclass_rate_diff_n20000=Bayesclassification(20000,miu1,miu2,sigma1,sigma2,0.6,0.4)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.3386613386613387
Misclassification Rate for ACTUAL Parameters:  0.6183816183816184
Misclassification Rate for ESTIMATED Parameters:  0.32267093162734906
Misclassification Rate for ACTUAL Parameters:  0.6077568972411036
Misclassification Rate for ESTIMATED Parameters:  0.3268
Misclassification Rate for ACTUAL Parameters:  0.6174
Misclassification Rate for ESTIMATED Parameters:  0.32086791320867913
Misclassification Rate for ACTUAL Parameters:  0.6243375662433757
Misclass Rate when N=2000:  0.27972027972027974
Misclass Rate when N=5000:  0.28508596561375454
Misclass Rate when N=10000:  0.29059999999999997
Misclass Rate when N=20000:  0.30346965303469653


In [9]:
#Case 4
misclass_rate_diff_n2000=Bayesclassification(2000,miu1,miu3,sigma1,sigma3,0.7,0.3)
misclass_rate_diff_n5000=Bayesclassification(5000,miu1,miu3,sigma1,sigma3,0.7,0.3)
misclass_rate_diff_n10000=Bayesclassification(10000,miu1,miu3,sigma1,sigma3,0.7,0.3)
misclass_rate_diff_n20000=Bayesclassification(20000,miu1,miu3,sigma1,sigma3,0.7,0.3)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.192
Misclassification Rate for ACTUAL Parameters:  0.363
Misclassification Rate for ESTIMATED Parameters:  0.19592163134746102
Misclassification Rate for ACTUAL Parameters:  0.3626549380247901
Misclassification Rate for ESTIMATED Parameters:  0.19976004799040192
Misclassification Rate for ACTUAL Parameters:  0.3669266146770646
Misclassification Rate for ESTIMATED Parameters:  0.1972
Misclassification Rate for ACTUAL Parameters:  0.3735
Misclass Rate when N=2000:  0.17099999999999999
Misclass Rate when N=5000:  0.16673330667732908
Misclass Rate when N=10000:  0.1671665666866627
Misclass Rate when N=20000:  0.1763


In [10]:
#Case 5
misclass_rate_diff_n2000=Bayesclassification(2000,miu2,miu3,sigma2,sigma3,0.3,0.7)
misclass_rate_diff_n5000=Bayesclassification(5000,miu2,miu3,sigma2,sigma3,0.3,0.7)
misclass_rate_diff_n10000=Bayesclassification(10000,miu2,miu3,sigma2,sigma3,0.3,0.7)
misclass_rate_diff_n20000=Bayesclassification(20000,miu2,miu3,sigma2,sigma3,0.3,0.7)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.191
Misclassification Rate for ACTUAL Parameters:  0.254
Misclassification Rate for ESTIMATED Parameters:  0.2088
Misclassification Rate for ACTUAL Parameters:  0.256
Misclassification Rate for ESTIMATED Parameters:  0.207
Misclassification Rate for ACTUAL Parameters:  0.253
Misclassification Rate for ESTIMATED Parameters:  0.2
Misclassification Rate for ACTUAL Parameters:  0.255
Misclass Rate when N=2000:  0.063
Misclass Rate when N=5000:  0.04719999999999999
Misclass Rate when N=10000:  0.04600000000000001
Misclass Rate when N=20000:  0.05499999999999999


In [11]:
#Case 6
misclass_rate_diff_n2000=BayesclassificationTHREE(2000,miu1,miu2,miu3,sigma1,sigma2,sigma3,0.3,0.4,0.3)
misclass_rate_diff_n5000=BayesclassificationTHREE(5000,miu1,miu2,miu3,sigma1,sigma2,sigma3,0.3,0.4,0.3)
misclass_rate_diff_n10000=BayesclassificationTHREE(10000,miu1,miu2,miu3,sigma1,sigma2,sigma3,0.3,0.4,0.3)
misclass_rate_diff_n20000=BayesclassificationTHREE(20000,miu1,miu2,miu3,sigma1,sigma2,sigma3,0.3,0.4,0.3)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.4005994005994006
Misclassification Rate for ACTUAL Parameters:  0.3906093906093906
Misclassification Rate for ESTIMATED Parameters:  0.43542582966813276
Misclassification Rate for ACTUAL Parameters:  0.4382247101159536
Misclassification Rate for ESTIMATED Parameters:  0.4261147770445911
Misclassification Rate for ACTUAL Parameters:  0.4241151769646071
Misclassification Rate for ESTIMATED Parameters:  0.43015698430156984
Misclassification Rate for ACTUAL Parameters:  0.43145685431456854
Misclass Rate when N=2000:  -0.009990009990009985
Misclass Rate when N=5000:  0.0027988804478208396
Misclass Rate when N=10000:  -0.0019996000799840097
Misclass Rate when N=20000:  0.001299870012998694


In [12]:
#Case 7
misclass_rate_diff_n2000=BayesclassificationTHREE(2000,miu2,miu3,miu4,sigma2,sigma3,sigma4,0.2,0.7,0.1)
misclass_rate_diff_n5000=BayesclassificationTHREE(5000,miu2,miu3,miu4,sigma2,sigma3,sigma4,0.2,0.7,0.1)
misclass_rate_diff_n10000=BayesclassificationTHREE(10000,miu2,miu3,miu4,sigma2,sigma3,sigma4,0.2,0.7,0.1)
misclass_rate_diff_n20000=BayesclassificationTHREE(20000,miu2,miu3,miu4,sigma2,sigma3,sigma4,0.2,0.7,0.1)

print("Misclass Rate when N=2000: ",misclass_rate_diff_n2000)
print("Misclass Rate when N=5000: ",misclass_rate_diff_n5000)
print("Misclass Rate when N=10000: ",misclass_rate_diff_n10000)
print("Misclass Rate when N=20000: ",misclass_rate_diff_n20000)

Misclassification Rate for ESTIMATED Parameters:  0.232
Misclassification Rate for ACTUAL Parameters:  0.465
Misclassification Rate for ESTIMATED Parameters:  0.22790883646541382
Misclassification Rate for ACTUAL Parameters:  0.4886045581767293
Misclassification Rate for ESTIMATED Parameters:  0.2168
Misclassification Rate for ACTUAL Parameters:  0.4652
Misclassification Rate for ESTIMATED Parameters:  0.21997800219978003
Misclassification Rate for ACTUAL Parameters:  0.47585241475852413
Misclass Rate when N=2000:  0.233
Misclass Rate when N=5000:  0.2606957217113155
Misclass Rate when N=10000:  0.2484
Misclass Rate when N=20000:  0.2558744125587441


# Taken help from sankar narayan misra for this code.