In [40]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#import sklearn as skl
#from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
#from sklearn import metrics


In [41]:
df = pd.read_csv('DF/FMnist/fashion-mnist_train.csv', dtype=int) # read train data

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB


In [43]:
x = df.drop("label",axis = 1)
y = df.label



In [44]:
def random_forest(x_train, x_test, y_train, y_test):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import classification_report, confusion_matrix

    #fit
    model = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
    model.fit(x_train, y_train)

    #test
    predict = model.predict(x_test)
    print(classification_report(y_test, predict))
    print(confusion_matrix(y_test, predict))

In [46]:
def naive(x_train, x_test, y_train, y_test):
    from sklearn.naive_bayes import GaussianNB
    from sklearn.metrics import classification_report, confusion_matrix


    #fit
    model = GaussianNB()
    model.fit(x_train, y_train)

    #test
    predict = model.predict(x_test)
    print(classification_report(y_test, predict))
    print(confusion_matrix(y_test, predict))

In [47]:
def log_reg(x_train, x_test, y_train, y_test):
    from sklearn.metrics import classification_report, confusion_matrix
    from sklearn.linear_model import LogisticRegression


    #fit
    model = LogisticRegression()
    model.fit(x_train, y_train)

    #test
    predict = model.predict(x_test)
    con_mat = confusion_matrix(y_test, predict)
    sns.heatmap(con_mat, annot=True)
    print(classification_report(y_test, predict))

In [48]:
def svm(x_train, x_test, y_train, y_test):
    from sklearn.linear_model import SGDClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report, confusion_matrix

    # Instantiate SVM classifier using SGDClassifier
    svm = SGDClassifier(loss='hinge')

    #fit
    svm.fit(x_train, y_train)

    #test
    predict = svm.predict(x_test)
    print(confusion_matrix(y_test, predict))
    print(classification_report(y_test, predict))
    print('Accuracy: %.3f' % accuracy_score(y_test, predict))

In [49]:
def knn(x_train, x_test, y_train, y_test):
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import classification_report, confusion_matrix

    #fit
    scaler = StandardScaler()
    scaler.fit(x_train)
    x_train_scaled = scaler.transform(x_train)
    x_test_scaled = scaler.transform(x_test)

    classifier = KNeighborsClassifier(n_neighbors=5)
    classifier.fit(x_train_scaled, y_train)

    #test
    predict = classifier.predict(x_test_scaled)
    #con_mat = confusion_matrix(y_test,predict)
    #sns.heatmap(con_mat, annot=True)
    print(confusion_matrix(y_test, predict))
    print(classification_report(y_test, predict))

In [50]:
def decision_tree(x_train, x_test, y_train, y_test):
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.metrics import classification_report, confusion_matrix

    #fit
    classifier = DecisionTreeClassifier()
    classifier.fit(x_train, y_train)

    #test
    predict = classifier.predict(x_test)
    print(confusion_matrix(y_test, predict))
    print(classification_report(y_test, predict))

In [51]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)


              precision    recall  f1-score   support

           0       0.74      0.80      0.77      4811
           1       0.97      0.87      0.92      4811
           2       0.60      0.70      0.65      4796
           3       0.65      0.89      0.75      4740
           4       0.51      0.74      0.60      4792
           5       0.96      0.84      0.90      4812
           6       0.63      0.01      0.03      4805
           7       0.86      0.90      0.88      4840
           8       0.90      0.93      0.91      4790
           9       0.87      0.93      0.90      4803

    accuracy                           0.76     48000
   macro avg       0.77      0.76      0.73     48000
weighted avg       0.77      0.76      0.73     48000

[[3839   20  206  605   23    2   13    0  103    0]
 [   9 4193   95  503    5    1    1    0    4    0]
 [  59    7 3373   54 1225    5    4    0   69    0]
 [ 130   44  201 4206  120    1   12    0   26    0]
 [  17   12  617  579 3529   

In [None]:
from sklearn.decomposition import PCA

#fit
pca= PCA(n_components=0.95)
x_pca_train = pca.fit_transform(x_train)
x_pca_test = pca.transform(x_test)

In [None]:
#without pca
random_forest(x_train, x_test, y_train, y_test)

In [None]:
#with pca
random_forest(x_pca_train, x_pca_test, y_train, y_test)

In [None]:
#without pca
naive(x_train, x_test, y_train, y_test)

In [None]:
#with pca
naive(x_pca_train, x_pca_test, y_train, y_test)

In [None]:
#without pca
#log_reg(x_train, x_test, y_train, y_test)

In [None]:
#without pca
svm(x_train, x_test, y_train, y_test)

In [None]:
#with pca
svm(x_pca_train, x_pca_test, y_train, y_test)

In [None]:
#without pca
knn(x_train, x_test, y_train, y_test)

In [None]:
#with pca
knn(x_train, x_test, y_train, y_test)

In [None]:
#without pca
decision_tree(x_train, x_test, y_train, y_test)

In [None]:
#with pca
decision_tree(x_train, x_test, y_train, y_test)