In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

from mlxtend.plotting import plot_decision_regions

In [None]:
# Read Data
train = pd.read_csv('fashionmnist/fashion-mnist_train.csv')
test = pd.read_csv('fashionmnist/fashion-mnist_test.csv')
X_train = train.iloc[:, 1:].values
Y_train = train.iloc[:, 0].values
X_test = test.iloc[:, 1:].values
Y_test = test.iloc[:, 0].values
X_train_pca = np.ones(1)
X_test_pca = np.ones(1)

In [None]:
# PCA Components & Training Samples
comp_num = 400
train_sample = 60000

In [None]:
# Select Training Samples
def select_samples(x, y, train_sample):
    X_out = x[:train_sample, :]
    Y_out = y[:train_sample]
    return X_out, Y_out

In [None]:

def test_accuracy(model, X_test, Y_test):
    model_predict = model.predict(X_test)
    model_predict = np.array(model_predict)
    test_set_accuracy = np.mean(Y_test == model_predict)*100
    print("Test Set Accuracy: ", test_set_accuracy, "%")    

def cross_validate(model, X_train, Y_train, cv_i):
    cv_scores = cross_val_score(estimator=model, X=X_train, y=Y_train, cv=cv_i, n_jobs=-1)
    cv_accuracy = np.mean(cv_scores)*100
    print("CV Accuracy Scores: ", cv_scores)
    print("CV Accuracy: ", cv_accuracy, "%")

In [None]:
#Select Partial Samples
X_train, Y_train = select_samples(X_train, Y_train, train_sample)

# Classifiers
svm = SVC(kernel='rbf', C=1.0, gamma=0.45, random_state=1)

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')

rnf = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1)

dct = DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=1)

gnb = GaussianNB(priors=None)

# nn_nodes = (400, 350, 300, 250, 200, 150, 100, 100, 100, 100)
nn_nodes = (400, 500, 600, 700, 800, 700, 600, 500, 400, 200, 100, 50)
# nn_nodes = (400, 350, 300, 250, 200, 150, 100)
mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=nn_nodes, random_state=1)

ovr = OneVsRestClassifier(mlp, n_jobs=-1)

clfs = [('mlp', mlp), ('rnf', rnf), ('knn', knn), ('dct', dct)]
vc = VotingClassifier(estimators=clfs, voting='soft')

# bgcRnf = BaggingClassifier(base_estimator=rnf, n_estimators=3, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, n_jobs=-1, random_state=1)

# bgcKnn = BaggingClassifier(base_estimator=knn, n_estimators=3, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, n_jobs=-1, random_state=1)


In [None]:
# Pack Above Classifier into Pipeline

# pipe_svm = make_pipeline(StandardScaler(), PCA(n_components=comp_num), svm)
pipe_knn = make_pipeline(StandardScaler(), PCA(n_components=comp_num), knn)
pipe_rnf = make_pipeline(StandardScaler(), PCA(n_components=comp_num), rnf)
pipe_dct = make_pipeline(StandardScaler(), PCA(n_components=comp_num), dct)
pipe_gnb = make_pipeline(StandardScaler(), PCA(n_components=comp_num), gnb)
pipe_mlp = make_pipeline(StandardScaler(), PCA(n_components=comp_num), mlp)
# pipe_bgcRnf = make_pipeline(StandardScaler(), PCA(n_components=comp_num), bgcRnf)
# pipe_bgcKnn = make_pipeline(StandardScaler(), PCA(n_components=comp_num), bgcKnn)
# pipe_ovrMlp = make_pipeline(StandardScaler(), PCA(n_components=comp_num), ovr)
pipe_vc = make_pipeline(StandardScaler(), PCA(n_components=comp_n

In [None]:
model = pipe_vc

# cross_validate(model, X_train, Y_train, 10)

model.fit(X_train, Y_train)
test_accuracy(model, X_test, Y_test)

In [None]:
#Test Result
# 12000 Samples
# 400 PCA Components
# svm = SVC(kernel='rbf', C=1.0, gamma=0.45, random_state=1)
# knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# rnf = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1)
# dct = DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=1)
# gnb = GaussianNB(priors=None)
# nn_nodes = (400, 500, 600, 700, 800, 700, 600, 500, 400, 200, 100, 50)
# mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=nn_nodes, random_state=1)
# clfs = [('mlp', mlp), ('rnf', rnf), ('knn', knn), ('dct', dct)]
# vc = VotingClassifier(estimators=clfs, voting='soft')

Saved Below
Model: clf_knn2019-11-24_14-34-16
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.82438618 0.81681932 0.81916667 0.81825761 0.82429048]
CV Accuracy:  82.05840518631346  +/-  0.3155579194904498  %
Test Set Accuracy:  82.6 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  82.6 %


Saved Below
Model: clf_rnf2019-11-24_14-35-40
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.80565959 0.80932556 0.80166667 0.80700292 0.79549249]
CV Accuracy:  80.38294452472672  +/-  0.48540733837157235  %
Test Set Accuracy:  80.2 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  80.2 %


Saved Below
Model: clf_dct2019-11-24_14-36-33
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.73782772 0.73022481 0.72791667 0.73197165 0.75      ]
CV Accuracy:  73.55881699069563  +/-  0.7917236317383891  %
Test Set Accuracy:  74.33 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  74.33 %


Saved Below
Model: clf_mlp2019-11-24_14-37-23
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.88056596 0.87801832 0.86458333 0.85368904 0.86018364]
CV Accuracy:  86.74080574234088  +/-  1.033528723570401  %
Test Set Accuracy:  87.2 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  87.2 %


Saved Below
Model: clf_vc2019-11-24_15-36-01
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.87224303 0.86011657 0.85666667 0.85452272 0.85559265]
CV Accuracy:  85.98283275923173  +/-  0.6485282265191098  %
Test Set Accuracy:  86.19 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  86.19 %


Saved Below
Model: clf_svm2019-11-24_16-31-19
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.11485643 0.11532057 0.1175     0.11379742 0.11602671]
CV Accuracy:  11.550022448662933  +/-  0.12347143492180315  %
Test Set Accuracy:  11.75 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  11.75 %


Saved Below
Model: clf_gnb2019-11-24_16-41-59
Test Condition: 
- 12000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.60757387 0.5970025  0.59125    0.58857857 0.60267112]
CV Accuracy:  59.74152113712241  +/-  0.7029781510911495  %
Test Set Accuracy:  60.64000000000001 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  60.64000000000001 %


In [None]:
#Test Result
# 1000 Samples
# 400 PCA Components
# svm = SVC(kernel='rbf', C=1.0, gamma=0.45, random_state=1)
# knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# rnf = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1)
# dct = DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=1)
# gnb = GaussianNB(priors=None)
# nn_nodes = (400, 500, 600, 700, 800, 700, 600, 500, 400, 200, 100, 50)
# mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=nn_nodes, random_state=1)
# clfs = [('mlp', mlp), ('rnf', rnf), ('knn', knn), ('dct', dct)]
# vc = VotingClassifier(estimators=clfs, voting='soft')
# vcw = VotingClassifier(estimators=clfs, voting='soft', weights=[2, 1, 1.5, 1])

Saved Below
Model: clf_knn2019-11-24_17-50-55
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.73170732 0.73762376 0.715      0.76767677 0.72307692]
CV Accuracy:  73.50169540406199  +/-  1.804247966207075  %
Test Set Accuracy:  75.35 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  75.35 %


Saved Below
Model: clf_rnf2019-11-24_17-51-09
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.69756098 0.63366337 0.72       0.71212121 0.65128205]
CV Accuracy:  68.29255210699306  +/-  3.42612637322941  %
Test Set Accuracy:  69.32000000000001 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  69.32000000000001 %


Saved Below
Model: clf_dct2019-11-24_17-51-16
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.56585366 0.55445545 0.59       0.64141414 0.55897436]
CV Accuracy:  58.213952089392805  +/-  3.2073446453489383  %
Test Set Accuracy:  59.18 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  59.18 %


Saved Below
Model: clf_mlp2019-11-24_17-51-23
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.73170732 0.75742574 0.81       0.71212121 0.76923077]
CV Accuracy:  75.60970081998819  +/-  3.3491853973367394  %
Test Set Accuracy:  79.45 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  79.45 %


Saved Below
Model: clf_vc2019-11-24_17-57-21
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.73658537 0.72772277 0.785      0.81313131 0.76410256]
CV Accuracy:  76.53084030729526  +/-  3.1349437867443015  %
Test Set Accuracy:  77.9 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  77.9 %


Saved Below
Model: clf_vcw2019-11-24_18-03-47
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.75121951 0.73762376 0.82       0.82323232 0.78974359]
CV Accuracy:  78.43638375094544  +/-  3.49079300948743  %
Test Set Accuracy:  80.13 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  80.13 %


Saved Below
Model: clf_svm2019-11-24_18-09-41
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.11219512 0.11386139 0.115      0.11616162 0.11282051]
CV Accuracy:  11.40077274143925  +/-  0.14380733253632702  %
Test Set Accuracy:  10.040000000000001 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  10.040000000000001 %


Saved Below
Model: clf_gnb2019-11-24_18-10-00
Test Condition: 
- 1000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.55609756 0.56930693 0.595      0.59090909 0.54358974]
CV Accuracy:  57.098066523350276  +/-  1.974153672179657  %
Test Set Accuracy:  58.47 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  58.47 %

In [None]:
#Test Result
# 60000 Samples
# 400 PCA Components
# svm = SVC(kernel='rbf', C=1.0, gamma=0.45, random_state=1)
# knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# rnf = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1)
# dct = DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=1)
# gnb = GaussianNB(priors=None)
# nn_nodes = (400, 500, 600, 700, 800, 700, 600, 500, 400, 200, 100, 50)
# mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=nn_nodes, random_state=1)
# clfs = [('mlp', mlp), ('rnf', rnf), ('knn', knn), ('dct', dct)]
# vc = VotingClassifier(estimators=clfs, voting='soft')
# vcw = VotingClassifier(estimators=clfs, voting='soft', weights=[2, 1, 1.5, 1])

Saved Below
Model: clf_knn2019-11-24_14-37-43
Test Condition: 
- 60000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.84925    0.84825    0.85041667 0.84733333 0.84908333]
CV Accuracy:  84.88666666666667  +/-  0.10322575044801341  %
Test Set Accuracy:  85.15 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  85.15 %


Saved Below
Model: clf_rnf2019-11-24_14-45-40
Test Condition: 
- 60000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.83683333 0.83133333 0.83725    0.82916667 0.82833333]
CV Accuracy:  83.25833333333334  +/-  0.37719726639754025  %
Test Set Accuracy:  83.67999999999999 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  83.67999999999999 %


Saved Below
Model: clf_dct2019-11-24_14-50-54
Test Condition: 
- 60000 Samples
- Gaussian Filter Convolution Sigma = 1.4 
- StandardScalar Normalize
- 400 PCA Components
CV Accuracy Scores:  [0.76466667 0.75758333 0.76558333 0.76625    0.75783333]
CV Accuracy:  76.23833333333334  +/-  0.38509017702928044  %
Test Set Accuracy:  77.49000000000001 %
Saving Model...
Loading Model...
Test Saved Model
Test Set Accuracy:  77.49000000000001 %