In [1]:
import pandas as pd
import numpy as np
from sklearn import tree, ensemble, naive_bayes, svm, linear_model, metrics, preprocessing
from sklearn.preprocessing import StandardScaler

In [2]:
def loadData(filename):
  return pd.read_csv(filename)

In [3]:
def dataPreprocessing(data):
  return preprocessing.StandardScaler().fit_transform(data)

In [4]:
def randomForest(trainingX, trainingY):
  classifier = ensemble.RandomForestClassifier(max_depth=2, random_state=0)
  classifier.fit(trainingX, trainingY)
  return classifier


In [5]:
def decisionTree(trainingX, trainingY):
  classifier = tree.DecisionTreeClassifier(random_state=0)
  classifier.fit(trainingX , trainingY)
  return classifier

In [6]:
def supportVector(trainingX, trainingY):
  classifier = svm.SVC()
  classifier.fit(trainingX, trainingY)
  return classifier

In [7]:
def guassionNB(trainingX, trainingY):
  classifier = naive_bayes.GaussianNB()
  classifier.fit(trainingX, trainingY)
  return classifier

In [8]:
def predict(classifier , testX):
   return classifier.predict(testX)


In [9]:
def evaluate(actual_class, predicted_class):
    #Your code to evaluate the model will go here. The code will print overall model's accuracy and precision 
    #and recall for each class label.        
    accuracy = metrics.accuracy_score(actual_class, predicted_class)*100
    print("The confusion matrix is :\n", metrics.confusion_matrix(actual_class, predicted_class))
    classes = actual_class.unique()
    metric = metrics.precision_recall_fscore_support(actual_class, predicted_class, average= None, labels = classes, zero_division = 0)
    print('\n \n Recall Score:')
    for i, recall in np.ndenumerate(metric[1]):
      print('Recall for class '+ str(classes[i[0]]) +' is', recall)
    print('\n \n Precision Scores:')
    for i, precision in np.ndenumerate(metric[0]):
      print('Precision for class '+ str(classes[i[0]]) +' is', precision)
    print('\n \n F-Scores:')
    for i, fscore in np.ndenumerate(metric[2]):
      print('Fscore for class '+ str(classes[i[0]]) +' is', fscore)
    print('\n \n Accuracy Score:')
    print("The accuracy score is :", accuracy)

In [10]:
print('loading data...')
dataset = loadData('fashion-mnist_train.csv')
trainingY = dataset['label']
dataset.drop(['label'], axis=1)
dataset = dataset.fillna(0)
print('preprocessing data...')
trainingX = dataPreprocessing(dataset)

print('Applying Support Vector classifier...')
model1 = supportVector(trainingX , trainingY)

print('Applying Random Forest classifier...')
model2 = randomForest(trainingX, trainingY)

print('Applying DecisionTree classifier....')
model3 = decisionTree(trainingX, trainingY)

print('Applying Guassion Naive Bayes classifier....')
model4 = guassionNB(trainingX, trainingY)



loading data...
preprocessing data...
Applying Support Vector classifier...
Applying Random Forest classifier...
Applying DecisionTree classifier....
Applying Guassion Naive Bayes classifier....


In [12]:
print('Loading the test data...')
dataset1 = loadData('fashion-mnist_test.csv')
testY = dataset1['label']
dataset1 = dataset1.fillna(0)
dataset1.drop(['label'], axis=1)
testX = dataPreprocessing(dataset1)

print('Predicting on support vector classifier model...')
predicted_class1 = predict(model1, testX)

print('Predicting on random forest classifier model...')
predicted_class2 = predict(model2, testX)

print('Predicting on decision tree classifier model...')
predicted_class3 = predict(model3, testX)

print('Predicting on Naive Bayes classifier model...')
predicted_class4 = predict(model4, testX)

print('\n \n Evaluation of Support Vector classifier')
evaluate(testY, predicted_class1)

print('\n \n Evaluation of Random Forest classifier')
evaluate(testY, predicted_class2)

print('\n \n Evaluation of Decision Tree Classifier')
evaluate(testY, predicted_class3)

print('\n \n Evaluation of Naive Bayes Classifier')
evaluate(testY, predicted_class4)

Loading the test data...
Predicting on support vector classifier model...
Predicting on random forest classifier model...
Predicting on decision tree classifier model...
Predicting on Naive Bayes classifier model...

 
 Evaluation of Support Vector classifier
The confusion matrix is :
 [[873   0  12  61   2   4  25   0  23   0]
 [  8 964   4  21   1   0   1   0   1   0]
 [ 26   0 788  12 114   0  48   0  12   0]
 [ 26   7  10 912  22   0  21   0   2   0]
 [  1   0  52  34 829   0  79   0   5   0]
 [  1   0   0   1   0 920   0  43   8  27]
 [ 52   0  60  50  78   0 731   0  29   0]
 [  0   0   0   0   0  30   0 909   0  61]
 [  1   0   0   4   1   5  19   2 967   1]
 [  0   0   0   0   0  15   0  48   9 928]]

 
 Recall Score:
Recall for class 0 is 0.873
Recall for class 1 is 0.964
Recall for class 2 is 0.788
Recall for class 3 is 0.912
Recall for class 8 is 0.967
Recall for class 6 is 0.731
Recall for class 5 is 0.92
Recall for class 4 is 0.829
Recall for class 7 is 0.909
Recall for cl