In [2]:
%matplotlib notebook
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.preprocessing import Binarizer
from sklearn.naive_bayes import CategoricalNB, ComplementNB,BernoulliNB,GaussianNB,MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier


import pandas as pd 
import matplotlib.image as matimage

train = pd.read_csv('fashion-mnist/train.csv')
test = pd.read_csv('fashion-mnist/test.csv')

X_train = np.array(train.iloc[:, 1:])
y_train = np.array(train.iloc[:, 0]) # target values of training dataset
X_test =  np.array(test.iloc[:, 1:])
y_test = np.array(test.iloc[:, 0]) # target values of testing dataset

#print("\n---- ----\n")

label_dictionnary = {0:'T-shirt/top', 1:'Trouser', 2:'Pullover', 
3:'Dress', 4:'Coat', 5:'Sandal', 6:'Shirt', 
7:'Sneaker', 8:'Bag', 9:'Ankle boot' }
def true_label(x):
    return label_dictionnary[x]
def plotPredict(pred):
    n_row = 1
    n_col = 10
    plt.figure(figsize=(10,8))
    for i in list(range(n_row*n_col)):
        plt.subplot(n_row, n_col, i+1)
        plt.imshow(X_test[i,:].reshape(28,28))
        title_text = ('PD: %s\nTR: %s' % (true_label(pred[i]), true_label(y_test[i])) )
        plt.title(title_text, size=6.5)

    plt.show()




In [3]:
binarizer = Binarizer()
train_binary = binarizer.fit_transform(X_train) 
test_binary = binarizer.fit_transform(X_test)
print("\n----Data Binarized----\n")


----Data Binarized----



In [None]:
for i in range(10):
    n=(i+1)*10
    clf = RandomForestClassifier(n_estimators=n)
    clf.fit(X_train, y_train)
    clfpredict = clf.predict(X_test)
    print("Random Forest n estimators: {} ".format(n))
    print(metrics.classification_report(y_test, clfpredict))
    accuracy = metrics.accuracy_score(y_test, clfpredict)
    average_accuracy = np.mean(y_test == clfpredict) * 100
    print("The average_accuracy is {0:.1f}%".format(average_accuracy))

In [None]:
# CategoricalNB, ComplementNB,BernoulliNB,GaussianNB,MultinomialNB
nbdict={0:CategoricalNB(),1:ComplementNB(), 2:BernoulliNB(),3:MultinomialNB(), 4:GaussianNB()}
for i in range(4):
    for j in range(10):
        #j+1
        print("\n----Learning {}----\n".format(nbdict[i]))
        cNB = nbdict[i]
        cNB.set_params(alpha=j)
        cNB.fit(train_binary, y_train)
        print("\n----Learnt----\n")
        cNB_predict = cNB.predict(test_binary)
        print("\n----Predicted----\n")
        print("\n----{} Results alpha ----\n".format(nbdict[i]))
        print(metrics.classification_report(y_test, cNB_predict))
        accuracy = metrics.accuracy_score(y_test, cNB_predict)
        average_accuracy = np.mean(y_test == cNB_predict) * 100
        print("The average_accuracy is {0:.1f}%".format(average_accuracy))

In [None]:
plotPredict(cNB_predict)

In [None]:
print("\n----Categorical NB Results ----\n")
print(metrics.classification_report(y_test, cNB_predict))
accuracy = metrics.accuracy_score(y_test, cNB_predict)
average_accuracy = np.mean(y_test == cNB_predict) * 100
print("The average_accuracy is {0:.1f}%".format(average_accuracy))

In [9]:

dict= {0:0.5,1:2.75,2:3,3:5,4:10,5:20}
for i in range(1):
    svc = SVC(C=0.1) #0.5 2.75 3 5,3 5 for normal
    print("\n----Learning SVC RBF bin {}----\n".format(svc))
    svc.fit(train_binary,y_train)
    print("\n----Learnt----\n")
    svc_predict = svc.predict(test_binary)
    print("\n----Predicted----\n")

    print(metrics.classification_report(y_test, svc_predict))
    accuracy = metrics.accuracy_score(y_test, svc_predict)
    average_accuracy = np.mean(y_test == svc_predict) * 100
    print("The average_accuracy is {0:.1f}%".format(average_accuracy))
dict2= {0:3,1:5,2:10,3:20}
for i in range(1):
    svc = SVC(C=0.1) #0.5 2.75 3 5,3 5 for normal
    print("\n----Learning SVC RBF norm{}----\n".format(svc))
    svc.fit(X_train,y_train)
    print("\n----Learnt----\n")
    svc_predict = svc.predict(X_test)
    print("\n----Predicted----\n")

    print(metrics.classification_report(y_test, svc_predict))
    accuracy = metrics.accuracy_score(y_test, svc_predict)
    average_accuracy = np.mean(y_test == svc_predict) * 100
    print("The average_accuracy is {0:.1f}%".format(average_accuracy))
    


----Learning SVC RBF bin SVC(C=0.1)----


----Learnt----


----Predicted----

              precision    recall  f1-score   support

           0       0.77      0.83      0.80      1000
           1       0.99      0.95      0.97      1000
           2       0.76      0.74      0.75      1000
           3       0.84      0.91      0.88      1000
           4       0.74      0.80      0.77      1000
           5       0.99      0.91      0.95      1000
           6       0.67      0.53      0.59      1000
           7       0.88      0.93      0.90      1000
           8       0.94      0.96      0.95      1000
           9       0.91      0.93      0.92      1000

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000

The average_accuracy is 84.9%

----Learning SVC RBF normSVC(C=0.1)----


----Learnt----


----Predicted----

              precision    recall  f1-score   support

In [None]:
plotPredict(svc_predict)

In [None]:
print("\n----SVC (RBF) Results----\n")

print(metrics.classification_report(y_test, svc_predict))
accuracy = metrics.accuracy_score(y_test, svc_predict)
average_accuracy = np.mean(y_test == svc_predict) * 100
print("The average_accuracy is {0:.1f}%".format(average_accuracy))

In [10]:
for i in range(1):
    #i+=
    svcPoly = SVC(kernel='poly',C=0.1)
    print("\n----Learning SVC poly {}----\n".format(svcPoly))
    svcPoly.fit(X_train,y_train)
    print("\n----Learnt----\n")
    Poly_predict = svcPoly.predict(X_test)
    print("\n----Predicted----\n")
    print("\n----SVC (Poly) Results----\n")

    print(metrics.classification_report(y_test, Poly_predict))
    accuracy = metrics.accuracy_score(y_test, Poly_predict)
    average_accuracy = np.mean(y_test == Poly_predict) * 100
    print("The average_accuracy is {0:.1f}%".format(average_accuracy))


----Learning SVC poly SVC(C=0.1, kernel='poly')----


----Learnt----


----Predicted----


----SVC (Poly) Results----

              precision    recall  f1-score   support

           0       0.79      0.81      0.80      1000
           1       0.99      0.95      0.97      1000
           2       0.83      0.70      0.76      1000
           3       0.89      0.85      0.87      1000
           4       0.85      0.76      0.80      1000
           5       0.67      0.94      0.78      1000
           6       0.61      0.65      0.63      1000
           7       0.90      0.82      0.86      1000
           8       0.96      0.92      0.94      1000
           9       0.94      0.91      0.93      1000

    accuracy                           0.83     10000
   macro avg       0.84      0.83      0.83     10000
weighted avg       0.84      0.83      0.83     10000

The average_accuracy is 83.2%


In [None]:
print("\n----SVC (Poly) Results----\n")

print(metrics.classification_report(y_test, Poly_predict))
accuracy = metrics.accuracy_score(y_test, Poly_predict)
average_accuracy = np.mean(y_test == Poly_predict) * 100
print("The average_accuracy is {0:.1f}%".format(average_accuracy))