In [None]:
#Importing the Libraries
import numpy as np 
import pandas as pd 
from scipy.io import loadmat

In [None]:
#Loading the dataset
fashion_data = loadmat('fashion_mnist.mat')

In [None]:
#Partitioning the dataset into Train Features, Train Labels, Test Features &Test Labels
fashion_train_X=fashion_data['trX']
fashion_train_y=fashion_data['trY']
fashion_test_X=fashion_data['tsX']
fashion_test_y=fashion_data['tsY']

In [None]:
#Caluclating the features by taking average of all the pixel values as X1 and standard deviation of all pixel values as X2 in train data
#Train Data
X1=np.mean(fashion_train_X, axis=1, dtype=np.float64)
X2=np.std(fashion_train_X, axis=1, dtype=np.float64)
#Caluclating the features by taking average of all the pixel values as x1 and standard deviation of all pixel values as x2 in test data
#Train Data
x1=np.mean(fashion_test_X, axis=1, dtype=np.float64)
x2=np.std(fashion_test_X, axis=1, dtype=np.float64)

In [None]:
#Creating Data Frame with the obtained features as Feature1 & Feature2 for train and test data
train_data=pd.DataFrame(X1, columns=['Feature1'])
train_data['Feature2']=X2
test_data=pd.DataFrame(x1, columns=['Feature1'])
test_data['Feature2']=x2

In [None]:
train_data['class']=np.transpose(fashion_train_y)

In [None]:
#Estimating Parameters-Mean
mean=dict()
mean[0]=train_data[train_data['class']==0].drop(columns=['class']).mean()
mean[1]=train_data[train_data['class']==1].drop(columns=['class']).mean()

In [None]:
#Printing estimated mean values
mean

{0: Feature1    0.325608
 Feature2    0.320036
 dtype: float64, 1: Feature1    0.222905
 Feature2    0.333942
 dtype: float64}

In [None]:
#Estimating Parameters-Covariance Matrix
covariance_matrix=dict()
data=train_data[train_data['class']==0].drop(columns=['class'])
data_1=data.sub(data.mean())
datat_1=data_1.transpose()
data2=train_data[train_data['class']==1].drop(columns=['class'])
data_2=data2.sub(data2.mean())
datat_2=data_2.transpose()
covariance_matrix[0]=datat_1.dot(data_1).div(data.shape[0]-1)
covariance_matrix[1]=datat_2.dot(data_2).div(data.shape[0]-1)
covariance_matrix

{0:           Feature1  Feature2
 Feature1  0.012856  0.008979
 Feature2  0.008979  0.007742, 1:           Feature1  Feature2
 Feature1  0.003244  0.002530
 Feature2  0.002530  0.003253}

In [None]:
#Estimating Parameters-Standard Deviation
import math
standard_deviation=dict()
standard_deviation[0]=pd.Series([math.sqrt(covariance_matrix[0]['Feature1']['Feature1']), math.sqrt(covariance_matrix[0]['Feature2']['Feature2'])], index=['Feature1', 'Feature2'])
standard_deviation[1]=pd.Series([math.sqrt(covariance_matrix[1]['Feature1']['Feature1']), math.sqrt(covariance_matrix[1]['Feature2']['Feature2'])], index=['Feature1', 'Feature2'])

In [None]:
#Printing estimated standard deviation values
standard_deviation

{0: Feature1    0.113384
 Feature2    0.087990
 dtype: float64, 1: Feature1    0.056956
 Feature2    0.057037
 dtype: float64}

In [None]:
#Naive Bayes Classifier

#To compute Normal Distribution
def gaussian_distribution(x,mean,sd):
  exponential=math.exp(-(x-mean)**2/(2*sd**2))
  a=math.sqrt(2*math.pi)*sd
  return exponential/a

#Caluclating Prior Probability
def prior_probability(y_labels):
  prior_of_classes=dict()
  for i in y_labels['class'].value_counts().index:
    prior_of_classes[int(i)]=y_labels['class'].value_counts().loc[i]/y_labels.shape[0]
  return prior_of_classes

#Caluclating Posterior Probability
def posterior_probability(data_d):
  probability=[(gaussian_distribution(data_d[i],mean[0][i],standard_deviation[0][i]), gaussian_distribution(data_d[i],mean[1][i],standard_deviation[1][i])) for i in data_d.index]
  return (probability[0][0]*probability[1][0]*prior_of_train[0],probability[0][1]*probability[1][1]*prior_of_train[1])

#Naive Bayes Classifier
def naive_bayes(test_samples):
  X=test_samples.apply(posterior_probability,axis=1, result_type='expand')
  X['predictions']=X.idxmax(axis=1)
  return X


In [None]:
#Caluclating Accuracy
def accuracy(actual_class,predicted_class):
  error=actual_class[0].sub(predicted_class['predictions']).abs().sum()
  return (1-(error/float(actual_class.shape[0])))*100

#Accuracy for each class
def class_acc(datac):
  d0=datac[datac['actual']==0]
  d1=datac[datac['actual']==1]
  acc0=(d0['actual'].sub(d0['Predict'])).abs().sum()
  acc00=(1-(acc0/d0.shape[0]))*100
  acc1=(d1['actual'].sub(d1['Predict'])).abs().sum()
  acc11=(1-(acc1/d1.shape[0]))*100
  return (acc00,acc11)

In [None]:
#Naive Bayes Classifier 
prior_of_train=dict()
global prior_of_train
prior_of_train=prior_probability(train_data)
predict=naive_bayes(test_data)
acc=accuracy(pd.DataFrame(fashion_test_y.transpose()),pd.DataFrame(predict))
#Printing the total accuracy of the built Naive Bayes Model
print("Accuracy of the Naive Bayes:"+str(acc)+"%")
c2=pd.DataFrame()
c2['Predict'] = predict['predictions']
c2['actual']=fashion_test_y[0].transpose()
res1=class_acc(c2)
#Printing the accuracy for each class
print("Accuracy of the Naive Bayes for class 0:"+str(res1[0])+"%")
print("Accuracy of the Naive Bayes for class 1:"+str(res1[1])+"%")



Accuracy of the Naive Bayes:83.15%
Accuracy of the Naive Bayes for class 0:78.4%
Accuracy of the Naive Bayes for class 1:87.9%


In [None]:
#Logistic Regression

#Computing Sigmoid Function
def sigmoid(weight,x):
  w=x.dot(weight.transpose())
  b=w.apply(sig)
  return b

def sig(x):
  return 1/(1+math.exp(-x))

#Caluclating Gradient Ascent for the Logistic Regression
def gradient(w, y, z, x, learning):
  y2=y-z
  for i in x.columns:
    w[i]+=learning*(y2.mul(x[i])).sum()
  return w

#Logistic Regression Model
def logistic_regression(train_features, train_class, test, learning, epoch):
  weights=pd.Series(np.random.randn(train_features.shape[1]+1), index=['1', 'Feature1', 'Feature2'])
  train_double=train_features.copy()
  train_double.insert(0,'1',1)
  for i in range(epoch):
    z=sigmoid(weights,train_double)
    weights=gradient(weights,train_class, z, train_double,learning)
  test_double=test.copy()
  test_double.insert(0,'1',1)
  return predicts(weights,test_double)

#Predictions of test data
def predicts(weights,test):
  z=sigmoid(weights,test)
  return z.apply(round)

In [None]:
#Caluclating the accuracy for the Logistic Regression test data
pred=logistic_regression(train_data.drop(columns=['class']), train_data['class'], test_data,0.02,300)
acc1=accuracy(pd.DataFrame(fashion_test_y.transpose()),pd.DataFrame(pred,columns=['predictions']))
print("Accuracy of the Logistic Regression:"+str(acc1)+"%")
c1=pd.DataFrame(pred,columns=['Predict'])
c1['actual']=fashion_test_y.transpose()
res=class_acc(c1)
print("Accuracy of the Logistic Regression for class 0:"+str(res[0])+"%")
print("Accuracy of the Logistic Regression for class 1:"+str(res[1])+"%")


Accuracy of the Logistic Regression:91.95%
Accuracy of the Logistic Regression for class 0:92.7%
Accuracy of the Logistic Regression for class 1:91.2%
