In [None]:
""""Objective - To perform image pre-processing for images in the dataset, load the model to train it and export the model."""

In [1]:
import joblib
import numpy as np
from sklearn.feature_extraction import image
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def prep_data(filePath,size,patches=5):
    images, _ = joblib.load(filePath)
    patch_extractor = image.PatchExtractor(patch_size=(size, size), max_patches=patches, random_state=42)
    sub_images = patch_extractor.transform(images)

    sub_images_rot = np.zeros(sub_images.shape)

    labels = np.zeros(sub_images.shape[0])
    for i in range(sub_images.shape[0]):
        random_k = np.random.randint(0, 4)
        labels[i] = random_k
        sub_images_rot[i] = np.rot90(sub_images[i], k=random_k, axes=(0, 1))

    features = sub_images_rot.reshape(len(sub_images), -1)
    dataset = {'features': features, 'labels': labels}
    return dataset

In [3]:
#filePath_train = 'data/train.full.joblib'
filePath_train = 'data/train.small.joblib'
filePath_test = 'data/eval1.joblib'
size = 90
patches = 4

In [4]:
train_data = prep_data(filePath_train,size,patches)
#val_data = prep_data(filePath_val,size,patches)

In [5]:
eval_data = joblib.load(open(filePath_test, "rb"))[size]
x_test = eval_data["x_test"]
y_test = eval_data["y_test"]

In [7]:
from sklearn.neural_network import MLPClassifier

pca2 = PCA(n_components=250)
mlp = MLPClassifier(activation='relu',alpha=0.0001,hidden_layer_sizes=(200,200))
model4 = Pipeline([('pca', pca2), ('mlp', mlp)])

model4.fit(train_data['features'], train_data['labels'])
model4.score(x_test, y_test)

0.9485

In [None]:
"""
hidden_layer_sizes = (100) --> 0.967
hidden_layer_sizes = (100,100) --> 0.958
hidden_layer_sizes = (100,100,100) --> 0.958
hidden_layer_sizes = (50) --> 0.959
hidden_layer_sizes = (200) --> 0.965

activation = 'relu' --> 0.967
activation = 'tanh' --> 0.963
activation = 'logistic' --> 0.956
activation = 'identity' --> 0.857

alpha = 0.1 --> 0.967
alpha = 0.01 --> 0.960
alpha = 0.001 --> 0.960
alpha = 0.0001 --> 0.956

n_components = 50   --> 0.96
n_components = 100  --> 0.964
n_components = 200  --> 0.9575

250 --> 0.967
"""

In [32]:
pca = PCA(n_components=90)
knn = KNeighborsClassifier(n_neighbors=2)
model_90 = Pipeline([('pca', pca), ('knn', knn)])

model_90.fit(train_data['features'], train_data['labels'])

model_90.score(x_test, y_test)

0.9405

In [33]:
# SVC - Params: kernel, C

from sklearn.svm import SVC

pca = PCA(n_components=90)
svc = SVC(C = 10,kernel='rbf')
model3 = Pipeline([('pca', pca), ('svc', svc)])

model3.fit(train_data['features'], train_data['labels'])
model3.score(x_test, y_test)

0.9815

In [36]:
neigh = range(3,12)

acc = []

for n in neigh:
    knn = KNeighborsClassifier(n_neighbors=n)
    model = Pipeline([('pca', pca), ('knn', knn)])
    model.fit(train_data['features'], train_data['labels'])
    acc.append(model.score(x_test, y_test))

In [37]:
bestParam = neigh[np.argmax(acc)]

knn = KNeighborsClassifier(n_neighbors=bestParam)
model = Pipeline([('pca', pca), ('knn', knn)])
model.fit(train_data['features'], train_data['labels'])
model.score(x_test, y_test)

0.957

In [34]:
from sklearn.ensemble import RandomForestClassifier

pca3 = PCA(n_components=90)
rfc = RandomForestClassifier(n_estimators = 100)
model2 = Pipeline([('pca', pca3), ('rfc', rfc)])

model2.fit(train_data['features'], train_data['labels'])
model2.score(x_test, y_test)

0.9395

0.9795