In [1]:
# load libraries
from loader import Loader
from sklearn import neural_network
# from sklearn.model_selection import train_test_split
# from sklearn.model_selection import cross_val_score
import numpy as np

In [2]:
def evaluate(received, ppredict):
    mismatch = 0
    for i in range(len(ppredict)):
        if received[i] != ppredict[i]:
            mismatch += 1

    return mismatch/len(received)


In [3]:
# Optional: add data augmentation using rotation
def augment_data_rotation(dataset, intensity=0.1):
    X_aug = []
    y_aug = []
    for im_class in range(10):
        for im_index in range(100):
            X_aug.append(dataset.getImage(im_class, im_index, aug=True, set="train", flat=True) / 255)
            y_aug.append(im_class)

    X_aug = np.asarray(X_aug)
    y_aug = np.asarray(y_aug)

    return X_aug, y_aug

In [5]:
# Load and prepare data set
dataset = Loader()
# dataset.pca(n_comp=240)
X_original, y_original = dataset.getWholeTrainSet(pca=False, shuffle=False, flat=True)
X_original = X_original / 6 # normalize images

# X_test, y_test =  dataset.getWholeTestSet(pca=False, shuffle=False, flat=True)
# X_test = X_test / 6 # normalize images

In [7]:
# cross validation
k = 10
fold_len = int(len(X_original)/k)
final = []
final_train = []

for run in range(10):
    # shuffle dataset to get new folds
    order = np.arange(len(X_original))
    np.random.shuffle(order)
    X = X_original[order]
    y = y_original[order]

    for fold in range(k):
        # construct training set
        if fold == 0:
            X_train = X[(fold+1)*fold_len:1000]
            y_train = y[(fold+1)*fold_len:1000]
        elif fold == k-1:
            X_train = X[0:fold*fold_len]
            y_train = y[0:fold*fold_len]
        else:
            X_train = np.concatenate((X[0:fold*fold_len], X[(fold+1)*fold_len:1000]))
            y_train = np.concatenate((y[0:fold*fold_len], y[(fold+1)*fold_len:1000]))

        # construct validation set
        X_val = X[fold*fold_len:(fold+1)*fold_len]
        y_val = y[fold*fold_len:(fold+1)*fold_len]

        # train and evaluate model
        model = neural_network.MLPClassifier(hidden_layer_sizes=(250, 100)) 
        model.fit(X_train, y_train)

        # training error
        y_pred_train = model.predict(X_train)
        train_err = evaluate(y_train, y_pred_train)
        final_train.append(train_err)

        # validation error
        y_pred = model.predict(X_val)
        val_err = evaluate(y_val, y_pred)
        final.append(val_err)

    print("run", run, ":", 100*(1-np.mean(final)), np.var((1-np.array(final)) * 100))

# print((1-np.array(final))*100)
print("result:")
print(100*(1-np.mean(final)), np.var((1-np.array(final)) * 100))
print("training", 100*(1-np.mean(final_train)), np.var((1-np.array(final_train)) * 100))

In [8]:
# # cross validation with data augmentation
# k = 10
# fold_len = int(len(X_original)/k)
# final = []
# final_train = []

# for run in range(10):
#     X_aug, y_aug = augment_data_rotation(dataset)

#     # shuffle dataset
#     order = np.arange(len(X_original))
#     np.random.shuffle(order)
#     X = X_original[order]
#     y = y_original[order]
#     X_aug = X_aug[order]
#     y_aug = y_aug[order]

#     # print("size dataset:", len(X), len(X_aug))

#     for fold in range(k):
#         # construct training set with augmented data
#         if fold == 0:
#             X_train = np.concatenate((X[(fold+1)*fold_len:1000], X_aug[(fold+1)*fold_len:1000]))
#             y_train = np.concatenate((y[(fold+1)*fold_len:1000], y_aug[(fold+1)*fold_len:1000]))
#         elif fold == k-1:
#             X_train = np.concatenate((X[0:fold*fold_len], X_aug[0:fold*fold_len]))
#             y_train = np.concatenate((y[0:fold*fold_len], y_aug[0:fold*fold_len]))
#         else:
#             X_train = np.concatenate((X[0:fold*fold_len], X[(fold+1)*fold_len:1000], X_aug[0:fold*fold_len], X_aug[(fold+1)*fold_len:1000]))
#             y_train = np.concatenate((y[0:fold*fold_len], y[(fold+1)*fold_len:1000], y_aug[0:fold*fold_len], y_aug[(fold+1)*fold_len:1000]))

#         # construct validation set
#         X_val = X[fold*fold_len:(fold+1)*fold_len]
#         y_val = y[fold*fold_len:(fold+1)*fold_len]

#         # print(len(X_train), len(X_val))

#         # train and evaluate model
#         model = neural_network.MLPClassifier(hidden_layer_sizes=(250, 100))
#         model.fit(X_train, y_train)

#         # training error
#         y_pred_train = model.predict(X_train)
#         train_err = evaluate(y_train, y_pred_train)
#         final_train.append(train_err)

#         y_pred = model.predict(X_val)
#         val_err = evaluate(y_val, y_pred)
#         final.append(val_err)

#     print("run", run, ":", 100*(1-np.mean(final)), np.var((1-np.array(final)) * 100), "         size dataset:", len(X), len(X_aug))

# print("\nresult:")
# print(100*(1-np.mean(final)), np.var((1-np.array(final)) * 100))
# print("training", 100*(1-np.mean(final_train)), np.var((1-np.array(final_train)) * 100))

In [6]:
# # final experiment on the test set
# k = 10
# fold_len = int(len(X_original)/k)
# final = []
# final_train = []

# for run in range(10):
#     X_aug, y_aug = augment_data_rotation(dataset)
#     X_train = np.concatenate((X_original, X_aug))
#     y_train = np.concatenate((y_original, y_aug))

#     # train and evaluate model
#     model = neural_network.MLPClassifier(hidden_layer_sizes=(250, 100)) 
#     model.fit(X_train, y_train)

#     # training error
#     y_pred_train = model.predict(X_train)
#     train_err = evaluate(y_train, y_pred_train)
#     final_train.append(train_err)

#     # testing error
#     y_pred = model.predict(X_test)
#     test_err = evaluate(y_test, y_pred)
#     final.append(test_err)

#     print("run", run, ":", 100*(1-np.mean(final)), np.var((1-np.array(final)) * 100))

# # print((1-np.array(final))*100)
# print("result:")
# print("testing", 100*(1-np.mean(final)), np.var((1-np.array(final)) * 100))
# print("training", 100*(1-np.mean(final_train)), np.var((1-np.array(final_train)) * 100))

run 0 : 96.0 0.0
run 1 : 96.05 0.002499999999999716
run 2 : 96.13333333333334 0.015555555555555366
run 3 : 96.05 0.03249999999999986
run 4 : 96.12 0.04559999999999925
run 5 : 96.18333333333334 0.05805555555555535
run 6 : 96.17142857142858 0.05061224489795906
run 7 : 96.125 0.059374999999999824
run 8 : 96.14444444444445 0.055802469135802335
run 9 : 96.19 0.0688999999999997
result:
testing 96.19 0.0688999999999997
training 100.0 0.0
