In [32]:
# Imports
import scipy.io 
import numpy as np
import math

# Extracting training and test Datasets
train_0_data=scipy.io.loadmat('train_0_img.mat')['target_img']
train_1_data=scipy.io.loadmat('train_1_img.mat')['target_img']
test_0_data=scipy.io.loadmat('test_0_img.mat')['target_img']
test_1_data=scipy.io.loadmat('test_1_img.mat')['target_img']

# Length Variables
train_0_len=len(train_0_data[0][0])
train_1_len=len(train_1_data[0][0])
test_0_len=len(test_0_data[0][0])
test_1_len=len(test_1_data[0][0])

# Lists for storing predicted values
predicted_0=[]
predicted_1=[]


In [33]:
# Generic functions

# Get average brightness for input dataset 'data' of size 'l'
def get_avg_brightness(data,l):
    avg_brightness=[]
    for i in range(l):
        arr=data[:,:,i]
        avg_brightness.append(np.mean(arr))
    return avg_brightness
        
# Get variance for input dataset 'data' of size 'l'
def get_variance(data,l):
    variance=[]
    for i in range(l):
        arr = data[:,:,i]
        variance.append(np.mean(np.var(arr, axis=1)))
    return variance

# Calculate probability using pdf for normal distribution 
def get_probability_density(x,mu,sigma2):
    e = math.exp(-(math.pow(x-mu,2)/(2*sigma2)))
    std = math.sqrt(sigma2)
    return (1 / (math.sqrt(2*math.pi)*std))*e

# Calculate accuracy for predicted labels
def get_accuracy(actual,predicted_list):
    pred_count=predicted_list.count(actual)
    print('{0} predicted out of {1} for digit {2}'.format(pred_count,len(predicted_list),actual))
    return pred_count/len(predicted_list)*100


In [34]:
# Get mean(b) and variance(v) feature vectors

# 1. For training data for digit 0
train_0_b=get_avg_brightness(train_0_data,train_0_len)
train_0_v=get_variance(train_0_data,train_0_len)

# 2. For training data for digit 1
train_1_b=get_avg_brightness(train_1_data,train_1_len)
train_1_v=get_variance(train_1_data,train_1_len)

# 3. For test data for digit 0
test_0_b=get_avg_brightness(test_0_data,test_0_len)
test_0_v=get_variance(test_0_data,test_0_len)

# 4. For test data for digit 1
test_1_b=get_avg_brightness(test_1_data,test_1_len)
test_1_v=get_variance(test_1_data,test_1_len)


In [37]:
# Estimating parameters using MLE

# 1. MLE for average brightness(x1) for digit 0
# mu => mle_0_x1_mu , sigma2 => mle_0_x1_sigma2

mle_0_x1_mu = sum(train_0_b)/len(train_0_b)
mle_0_x1_sigma2 = sum([math.pow(x-mle_0_x1_mu,2) for x in train_0_b])/len(train_0_b)
print('mle_0_x1_mu,mle_0_x1_sigma2',mle_0_x1_mu,mle_0_x1_sigma2)


# 2. MLE for variance(x2) for digit 0
# mu => mle_0_x2_mu, sigma2 => mle_0_x2_sigma2

mle_0_x2_mu = sum(train_0_v)/len(train_0_v)
mle_0_x2_sigma2 = sum([math.pow(x-mle_0_x2_mu,2) for x in train_0_v])/len(train_0_v)
#mle_0_x2_mu=6595.4127441827495
print('mle_0_x2_mu, mle_0_x2_sigma2',mle_0_x2_mu,mle_0_x2_sigma2)


# 3. MLE for average brightness(x1) for digit 1
# mu => mle_1_x1_mu, sigma2 => mle_1_x1_sigma2

mle_1_x1_mu = sum(train_1_b)/len(train_1_b)
mle_1_x1_sigma2 = sum([math.pow(x-mle_1_x1_mu,2) for x in train_1_b])/len(train_1_b)
print('mle_1_x1_mu,mle_1_x1_sigma2',mle_1_x1_mu,mle_1_x1_sigma2)


# 4. MLE for variance(x2) for digit 1
# mu => mle_1_x2_mu, sigma2 => mle_1_x2_sigma2

mle_1_x2_mu = sum(train_1_v)/len(train_1_v)
mle_1_x2_sigma2 = sum([math.pow(x-mle_1_x2_mu,2) for x in train_1_v])/len(train_1_v)
print('mle_1_x2_mu, mle_1_x2_sigma2',mle_1_x2_mu,mle_1_x2_sigma2)


mle_0_x1_mu,mle_0_x1_sigma2 44.21682790539819 115.26578827239825
mle_0_x2_mu, mle_0_x2_sigma2 6557.526231029935 1574031.7326931036
mle_1_x1_mu,mle_1_x1_sigma2 19.379653852789975 31.44685978618372
mle_1_x2_mu, mle_1_x2_sigma2 3658.6940541248864 1040099.4705590858


In [36]:
# Calculating probabilities using Naive Bayes theorem : 

prob_0=[] # Probability vector for classification as 0
prob_1=[] # Probability vector for classification as 1

# 1. p(x|y) = p(x1|y)*p(x2|y) 
# p(x1|y=0) =>  prob_x1_0 , p(x2|y=0) =>  prob_x2_0
# p(x1|y=1) =>  prob_x1_1 , p(x2|y=1) =>  prob_x2_1

# p(x|y=0) => prob_x_0 
# p(x|y=1) => prob_x_1

# 1. Calculating for test data of digit 0

for i in range(test_0_len):
    prob_x1_0=get_probability_density(test_0_b[i],mle_0_x1_mu,mle_0_x1_sigma2)
    prob_x2_0=get_probability_density(test_0_v[i],mle_0_x2_mu,mle_0_x2_sigma2)
    prob_x_0 = prob_x1_0 * prob_x2_0* 0.5
    prob_0.append(prob_x_0)
    
    prob_x1_1=get_probability_density(test_0_b[i],mle_1_x1_mu,mle_1_x1_sigma2)
    prob_x2_1=get_probability_density(test_0_v[i],mle_1_x2_mu,mle_1_x2_sigma2)
    prob_x_1 = prob_x1_1 * prob_x2_1 * 0.5
    prob_1.append(prob_x_1)
    
    if(prob_x_1>prob_x_0):
        predicted_0.append(1)
    else:
        predicted_0.append(0)

# 2. Calculating for test data of digit 1

for i in range(test_1_len):
    prob_x1_0=get_probability_density(test_1_b[i],mle_0_x1_mu,mle_0_x1_sigma2)
    prob_x2_0=get_probability_density(test_1_v[i],mle_0_x2_mu,mle_0_x2_sigma2)
    prob_x_0 = prob_x1_0 * prob_x2_0 * 0.5
    prob_0.append(prob_x_0)
    
    prob_x1_1=get_probability_density(test_1_b[i],mle_1_x1_mu,mle_1_x1_sigma2)
    prob_x2_1=get_probability_density(test_1_v[i],mle_1_x2_mu,mle_1_x2_sigma2)
    prob_x_1 = prob_x1_1 * prob_x2_1 * 0.5
    prob_1.append(prob_x_1)
    
    if(prob_x_1>prob_x_0):
        predicted_1.append(1)
    else:
        predicted_1.append(0)
        

print('******************************************************************')
print('Accuracy for 0: ',get_accuracy(0,predicted_0))
print('Accuracy for 1: ',get_accuracy(1,predicted_1))
print('******************************************************************')

******************************************************************
894 predicted out of 980 for digit 0
Accuracy for 0:  91.22448979591836
1047 predicted out of 1135 for digit 1
Accuracy for 1:  92.2466960352423
******************************************************************
