In [None]:
import numpy as np
from keras.datasets import cifar10
from sklearn.svm import SVC
from sklearn import tree
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, learning_curve, ShuffleSplit
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings('ignore')

In [None]:
def pca(train, test, num_components):
    
    pca = PCA(n_components=num_components)
    pca.fit(train_X)
    return pca.fit_transform(train), pca.fit_transform(test)

In [None]:
def lda(train_X, train_Y, test):
    
    lda = LDA()
    lda.fit(train_X, train_Y)
    return lda.transform(train_X), lda.transform(test)

In [None]:
def LR(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, C, penalty):
    
    hyp_param = dict(C=C, penalty=penalty)
    print("Hyperparameters: ", hyp_param)
    clf = LogisticRegression()

    hyperparam(train_X_pca, train_Y, clf, hyp_param, test_X_pca, test_Y, t, "Logistic Regression PCA")
    hyperparam(train_X_lda, train_Y, clf, hyp_param, test_X_lda, test_Y, t, "Logistic Regression LDA")
    hyperparam(train_X_lpca, train_Y, clf, hyp_param, test_X_lpca, test_Y, t, "Logistic Regression PCA + LDA")

In [None]:
def SVM_lin(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, C, kernel, max_iter):
    
    hyp_param = dict(C=C, kernel=kernel, max_iter=max_iter)
    clf = SVC()

    hyperparam(train_X_pca, train_Y, clf, hyp_param, test_X_pca, test_Y, t, "SVM PCA", cv=2)
    hyperparam(train_X_lda, train_Y, clf, hyp_param, test_X_lda, test_Y, t, "SVM LDA", cv=2)
    hyperparam(train_X_lpca, train_Y, clf, hyp_param, test_X_lpca, test_Y, t, "SVM PCA + LDA", cv=2)

In [None]:
def dec_tree(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, max_features, min_samples_split, min_samples_leaf):
    n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 5)]
    max_depth = [int(x) for x in np.linspace(10, 110, num = 5)]
    max_depth.append(None)
    hyp_param = dict(max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)
    print(hyp_param)
    clf = tree.DecisionTreeClassifier()

    hyperparam(train_X_pca, train_Y, clf, hyp_param, test_X_pca, test_Y, t, "Decision Tree PCA", cv=3)
    hyperparam(train_X_lda, train_Y, clf, hyp_param, test_X_lda, test_Y, t, "Decision Tree LDA", cv=3)
    hyperparam(train_X_lpca, train_Y, clf, hyp_param, test_X_lpca, test_Y, t, "Decision Tree PCA + LDA", cv=3)

In [None]:
def mlp(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, hidden_layer_sizes, max_iter, alpha, learning_rate_init):

    hidden_layer_sizes = [(256,), (256, 128)]
    max_iter = [1000]
    alpha = np.logspace(-5, -1, 5)
    learning_rate_init = [1e-3, 1e-4, 1e-5]
    hyp_param = dict(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter, alpha=alpha, learning_rate_init=learning_rate_init)
    print(hyp_param)
    clf = MLPClassifier()

    hyperparam(train_X_pca, train_Y, clf, hyp_param, test_X_pca, test_Y, t, "MLP PCA", cv=3)
    hyperparam(train_X_lda, train_Y, clf, hyp_param, test_X_lda, test_Y, t, "MLP LDA", cv=3)
    hyperparam(train_X_lpca, train_Y, clf, hyp_param, test_X_lpca, test_Y, t, "MLP PCA + LDA", cv=3)

In [None]:
def graph(estimator, title, X, y, cv=None, n_jobs=None, train_size=np.linspace(.1, 1.0, 5)):
    
    plt.figure()
    plt.title(title)
    plt.xlabel("Trained Examples")
    plt.ylabel("Points")
    train_size, train_scr, test_scr = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_size)
    train_scr_mean = np.mean(train_scr, axis=1)
    train_scr_std = np.std(train_scr, axis=1)
    test_scr_mean = np.mean(test_scr, axis=1)
    test_scr_std = np.std(test_scr, axis=1)
    plt.grid()
    plt.fill_between(train_size, (train_scr_mean - train_scr_std), (train_scr_mean + train_scr_std), alpha=0.1, color="b")
    plt.fill_between(train_size, (test_scr_mean - test_scr_std), (test_scr_mean + test_scr_std), alpha=0.1, color="y")
    plt.plot(train_size, train_scr_mean, 'o-', color="b", label="Training Points")
    plt.plot(train_size, test_scr_mean, 'o-', color="y", label="Cross-validation score")
    # plt.legend(loc="best")
    plt.savefig(title)

In [None]:
def hyperparam(train_X, train_Y, estimator, params, test_X, test_Y, t, title, cv=3):
    
    clf = GridSearchCV(estimator, params, cv=cv, verbose=0)
    model = clf.fit(train_X, train_Y)
    train_score = model.score(train_X, train_Y)
    test_score = model.score(test_X, test_Y)
    print("*"*100)
    print(title)
    print("Best Parameters")
    print(model.best_estimator_.get_params())
    print(Testing Score: ", test_score)
    graph(model, title, train_X, train_Y, cv=cv)

In [None]:
k = 3072
t = -1

(train_X, train_Y), (test_X, test_Y) = cifar10.load_data()
train_X = train_X.reshape(t, k)
test_X = test_X.reshape(t, k)
train_Y = train_Y.reshape(t)
test_Y = test_Y.reshape(t)

mean_train_X = np.mean(train_X, axis=0)
mean_test_X = np.mean(test_X, axis=0)

n_train = np.std(train_X, axis=0)
n_test = np.std(test_X, axis=0)

train_X = (train_X - mean_train_X) / n_train
test_X = (test_X - mean_test_X) / n_test

In [None]:
train_X_pca, test_X_pca = pca(train_X, test_X, num_components=13)
train_X_pca_lrg, test_X_pca_lrg = pca(train_X, test_X, num_components=285)

In [None]:
train_X_lda, test_X_lda = lda(train_X, train_Y, test_X)
train_X_lpca, test_X_lpca = lda(train_X_pca_lrg, train_Y, test_X_pca_lrg)

In [None]:
LR(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, np.logspace(-2, 4, 7), ['l2'])

In [None]:
SVM_lin(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, [0.001, 0.01, 0.1, 10, 100, 1000], ['linear'], [1e4])

In [None]:
dec_tree(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, ['auto', 'sqrt'], [2, 5, 10], [1, 2, 4, 7])

In [None]:
mlp(train_X_pca, train_X_lda, train_X_lpca, train_Y, test_X_pca, test_X_lda, test_X_lpca, test_Y, t, [(256,), (256, 128)], [1000], np.logspace(-5, -1, 5), [1e-3, 1e-4, 1e-5])