In [None]:
""""Objective - To perform image pre-processing for images in the dataset, load the model to train it and export the model."""

In [1]:
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction import image
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def prep_data(filePath,size,patches=5):
    images, _ = joblib.load(filePath)
    patch_extractor = image.PatchExtractor(patch_size=(size, size), max_patches=patches, random_state=42)
    sub_images = patch_extractor.transform(images)

    sub_images_rot = np.zeros(sub_images.shape)

    labels = np.zeros(sub_images.shape[0])
    for i in range(sub_images.shape[0]):
        random_k = np.random.randint(0, 4)
        labels[i] = random_k
        sub_images_rot[i] = np.rot90(sub_images[i], k=random_k, axes=(0, 1))

    features = sub_images_rot.reshape(len(sub_images), -1)
    dataset = {'features': features, 'labels': labels}
    return dataset

In [3]:
#filePath_train = 'data/train.full.joblib'
filePath_train = 'data/train.small.joblib'
#filePath_val = 'data/train.full.joblib'
filePath_test = 'data/eval1.joblib'
size = 30
patches = 4

In [4]:
train_data = prep_data(filePath_train,size,patches)

In [5]:
eval_data = joblib.load(open(filePath_test, "rb"))[size]
x_test = eval_data["x_test"]
y_test = eval_data["y_test"]

In [7]:
from sklearn.neural_network import MLPClassifier

pca2 = PCA(n_components=30)
mlp = MLPClassifier(activation='tanh',alpha=0.0001,hidden_layer_sizes=(200),max_iter=1000)
model4 = Pipeline([('pca', pca2), ('mlp', mlp)])

model4.fit(train_data['features'], train_data['labels'])
model4.score(x_test, y_test)



0.4425

In [None]:
"""
hidden_layer_sizes = (100)      --> 0.457
hidden_layer_sizes = (100,100)  --> 0.48
hidden_layer_sizes = (100,100,100)  --> 0.468
hidden_layer_sizes = (50,50)  --> 0.469
hidden_layer_sizes = (200,200)  --> 0.501

activation = 'relu'      --> 0.487
activation = 'logistic'  --> 0.344
activation = 'tanh'      --> 0.503
activation = 'identity'  --> 0.35

alpha = 0.1  --> 0.503
alpha = 0.01  --> 0.478
alpha = 0.001  --> 0.483
alpha = 0.0001  --> 0.48

n_components = 20   --> 0.479
n_components = 30   --> 0.5015
n_components = 60   --> 0.4915
n_components = 120  --> 0.4715
"""

In [67]:
pca = PCA(n_components=30)
knn = KNeighborsClassifier(n_neighbors=1)
model_90 = Pipeline([('pca', pca), ('knn', knn)])

model_90.fit(train_data['features'], train_data['labels'])

model_90.score(x_test, y_test)

0.46

In [14]:
model30 = joblib.load('models/knn.30.joblib')

In [16]:
model30

In [15]:
model30.score(x_test, y_test)

0.4755

In [68]:
# SVC - Params: kernel, C

from sklearn.svm import SVC

pca = PCA(n_components=30)
svc = SVC(C = 30,kernel='rbf')
model3 = Pipeline([('pca', pca), ('svc', svc)])

model3.fit(train_data['features'], train_data['labels'])
model3.score(x_test, y_test)

0.574

In [None]:
"""
C = 1   0.421
C = 10  0.4725 
C = 20  0.481
C = 30  0.4925
C = 50  0.483

n_components = 10   0.419
n_components = 20   0.4785
n_components = 30   0.4925
n_components = 60   0.4805

"""

In [69]:
neigh = range(1,12)

acc = []

for n in neigh:
    knn = KNeighborsClassifier(n_neighbors=n)
    model = Pipeline([('pca', pca), ('knn', knn)])
    model.fit(train_data['features'], train_data['labels'])
    acc.append(model.score(x_test, y_test))

In [70]:
bestParam = neigh[np.argmax(acc)]

knn = KNeighborsClassifier(n_neighbors=bestParam)
model = Pipeline([('pca', pca), ('knn', knn)])
model.fit(train_data['features'], train_data['labels'])
model.score(x_test, y_test)

0.489

In [71]:
from sklearn.ensemble import RandomForestClassifier

pca_rfc = PCA(n_components=60)
rfc = RandomForestClassifier(n_estimators = 600)
model2 = Pipeline([('pca', pca_rfc), ('rfc', rfc)])

model2.fit(train_data['features'], train_data['labels'])
model2.score(x_test, y_test)

0.5105

In [None]:
"""
n_estimators = 50   --> 0.449
n_estimators = 100  --> 0.4695
n_estimators = 200  --> 0.4675
n_estimators = 400  --> 0.4715
n_estimators = 600  --> 0.4805

n_components = 20   --> 0.4725
n_components = 30   --> 0.4805
n_components = 60   --> 0.4675
"""


0.5775