In [161]:
import csv
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB #gaussian naive Bayes classifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score #calculating accuracy

In [162]:
def load_file(fileName):
    dataset = pd.read_table(fileName, header=0, sep=",", encoding="unicode_escape")
    return dataset

In [163]:
def preprocess(train,test):
    features = ["label"]
    for i in range(1,test.shape[1]):
        features.append("pixel" + str(i))
    trainingY, trainingX = train[features[0]], train[features[1:]]
    testY, testX = test[features[0]], test[features[1:]]
    return trainingX,trainingY,testX,testY

In [164]:
def learn_model(data,target):
    gNB = GaussianNB()
    dtc = DecisionTreeClassifier()
    lda = LinearDiscriminantAnalysis()
    rfr = RandomForestClassifier(max_depth=50, n_estimators=150, max_features=1)
    
    gnb_classifier = gNB.fit(data,target)
    dtc_classifier = dtc.fit(data,target)
    lda_classifier = lda.fit(data,target)
    rfr_classifier = rfr.fit(data,target)
    
    return gnb_classifier, dtc_classifier, lda_classifier, rfr_classifier

In [165]:
def classify(classifier, testdata):
    gnb_classifier = classifier[0]
    dtc_classifier = classifier[1]
    lda_classifier = classifier[2]
    rfr_classifier = classifier[3]
    
    predicted_val_gnB = gnb_classifier.predict(testdata)
    predicted_val_dtc = dtc_classifier.predict(testdata)
    predicted_val_lda = lda_classifier.predict(testdata)
    predicted_val_rfr = rfr_classifier.predict(testdata)
    
    return predicted_val_gnB, predicted_val_dtc, predicted_val_lda, predicted_val_rfr

In [166]:
def evaluate(actual_class, predicted_class):
    predicted_class_gnB = predicted_class[0]
    predicted_class_dtc = predicted_class[1]
    predicted_class_lda = predicted_class[2]
    predicted_class_rfr = predicted_class[3]
    
    accuracy_gnB = accuracy_score(actual_class, predicted_class_gnB)
    accuracy_dtc = accuracy_score(actual_class, predicted_class_dtc)
    accuracy_lda = accuracy_score(actual_class, predicted_class_lda)
    accuracy_rfr = accuracy_score(actual_class, predicted_class_rfr)
    
    print("The accuracy score of Gaussian Naive Bayes is :",accuracy_gnB)
    print("The accuracy score of Decision Tree Classifier is :",accuracy_dtc)
    print("The accuracy score of Linear Discriminant Analysis is :",accuracy_lda)
    print("The accuracy score of Random Forest Classifier is :",accuracy_rfr)

In [167]:
print("Loading data.....")
train = load_file("fashion-mnist_train.csv")
test = load_file("fashion-mnist_test.csv")

Loading data.....


In [168]:
print("preprocessing data.....")
data = preprocess(train,test)
trainingX = data[0]
trainingY = data[1]
testX =  data[2]
testY = data[3]

preprocessing data.....


In [169]:
print("Learning model.....")
model = learn_model(trainingX,trainingY)

Learning model.....


In [170]:
print("Classifying test data......")      
predictedY = classify(model, testX)

Classifying test data......


In [171]:
print("Evaluating results.....")
evaluate(testY,predictedY)

Evaluating results.....
The accuracy score of Gaussian Naive Bayes is : 0.5914
The accuracy score of Decision Tree Classifier is : 0.7988
The accuracy score of Linear Discriminant Analysis is : 0.8256
The accuracy score of Random Forest Classifier is : 0.862
