In [1]:
# load libraries
from loader import Loader
from sklearn import neural_network
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy as np

In [2]:
def evaluate(received, ppredict):
    mismatch = 0
    for i in range(len(ppredict)):
        if received[i] != ppredict[i]:
            mismatch += 1

    return mismatch/len(received)


In [3]:
# Load and prepare data set
dataset = Loader()
# dataset.pca(n_comp=25)
X, y = dataset.getWholeTrainSet(pca=False, shuffle=False, flat=True)
X = X / 6   # normalize data set

print(len(X), len(y))
print(len(X[0]))

1000 1000
240


In [6]:
# Optional: add data augmentation
X_aug = []
y_aug = []
for im_class in range(10):
    for im_index in range(100):
        X_aug.append(dataset.getImage(im_class, im_index, aug=True, set="train", flat=True) / 255)
        y_aug.append(im_class)

X_aug = np.asarray(X_aug)
y_aug = np.asarray(y_aug)
# X = np.concatenate((X, X_aug), axis=0)
# y = np.concatenate((y, y_aug), axis=0)

print(len(X_aug), len(y_aug))
print(len(X_aug[0]))


1000 1000
240


In [7]:
# shuffle dataset
order = np.arange(len(X))
np.random.shuffle(order)
X = X[order]
y = y[order]
X_aug = X_aug[order]
y_aug = y_aug[order]

In [32]:
# cross validation
k = 5
fold_len = int(len(X)/k)
final = []

for fold in range(k):
    # construct training set
    if fold == 0:
        X_train = X[(fold+1)*fold_len:1000]
        y_train = y[(fold+1)*fold_len:1000]
    elif fold == k-1:
        X_train = X[0:fold*fold_len]
        y_train = y[0:fold*fold_len]
    else:
        X_train = np.concatenate((X[0:fold*fold_len], X[(fold+1)*fold_len:1000]))
        y_train = np.concatenate((y[0:fold*fold_len], y[(fold+1)*fold_len:1000]))

    # # construct training set with augmented data
    # if fold == 0:
    #     X_train = np.concatenate((X[(fold+1)*fold_len:1000], X_aug[(fold+1)*fold_len:1000]))
    #     y_train = np.concatenate((y[(fold+1)*fold_len:1000], y_aug[(fold+1)*fold_len:1000]))
    # elif fold == k-1:
    #     X_train = np.concatenate((X[0:fold*fold_len], X_aug[0:fold*fold_len]))
    #     y_train = np.concatenate((y[0:fold*fold_len], y_aug[0:fold*fold_len]))
    # else:
    #     X_train = np.concatenate((X[0:fold*fold_len], X[(fold+1)*fold_len:1000], X_aug[0:fold*fold_len], X_aug[(fold+1)*fold_len:1000]))
    #     y_train = np.concatenate((y[0:fold*fold_len], y[(fold+1)*fold_len:1000], y_aug[0:fold*fold_len], y_aug[(fold+1)*fold_len:1000]))

    # construct validation set
    X_val = X[fold*fold_len:(fold+1)*fold_len]
    y_val = y[fold*fold_len:(fold+1)*fold_len]

    # train and evaluate model
    model = neural_network.MLPClassifier(hidden_layer_sizes=(10)) 
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    val_err = evaluate(y_val, y_pred)
    final.append(val_err)

print((1-np.array(final))*100, 100*(1-np.mean(final)))

[89.  95.5 95.  95.  95. ] 93.9


In [24]:
# # apply model
# model = neural_network.MLPClassifier(hidden_layer_sizes=(250)) 
# scores = cross_val_score(model, X, y, cv=5)
# print(scores)
# print(np.mean(scores))
# print(np.var(scores))

In [4]:
# # Optional: add data augmentation
# X_aug = np.empty(5, 200)
# y_aug = np.empty(5, 200)
# X = np.empty(5, 200)
# y = np.empty(5, 200)
# for im_class in range(10):
#     for fold in range(5):
#         for im_index in range(20):
#             X_aug[fold].append(dataset.getImage(im_class, 20*fold+im_index, aug=True, set="train", flat=True) / 255)
#             y_aug[fold].append(im_class)

#             X[fold].append(dataset.getImage(im_class, 20*fold+im_index, aug=False, set="train", flat=True) / 6)
#             y[fold].append(im_class)

#             if im_class == 1:
#                 print(20*fold+im_index)

# X = np.concatenate((X, X_aug), axis=0)
# y = np.concatenate((y, y_aug), axis=0)

# print(len(X), len(y))
# print(len(X[0]))

In [6]:
# How many epochs? ?????????????
# What is the training error?

# To do: set up own cross-validation, only data augmentation for the training