In [1]:
import pandas as pd 
import numpy as np
import albumentations as A
import cv2
import joblib
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold
import matplotlib.pyplot as plt
from time import time
from random import seed

In [2]:
DATA_PATH = './data'
def load_dataset():
    X_yes = np.load(f'{DATA_PATH}/def_yes_images.npy')
    X_no = np.load(f'{DATA_PATH}/def_no_images.npy')
    classes = np.load(f'{DATA_PATH}/classes.npy')

    print(f'X_yes shape: {X_yes.shape}')
    print(f'X_no shape {X_no.shape}')
    print(f'classes shape: {classes.shape}')

    return X_yes, X_no, classes

def reshape_dataset(X_yes, X_no):
    X = np.row_stack((X_yes, X_no))
    N, SIZE_H, SIZE_V = X.shape
    X = np.reshape(X, newshape=(N, SIZE_H * SIZE_V))
    print(f'X shape: {X.shape}')
    return X

In [3]:
X_yes, X_no, classes = load_dataset()
X = reshape_dataset(X_yes, X_no)

X_yes shape: (135, 214, 214)
X_no shape (87, 214, 214)
classes shape: (222,)
X shape: (222, 45796)


## Perceptrón multi-capa con reducción de dimensionalidad PCA

In [4]:
def draw_pca_variance(n_components, variance_cumsum):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
    ax.set_xlabel('Componente principal')
    ax.set_ylabel('Porcentaje de varianza acumulada')
    ax.plot(
        np.arange(1, n_components + 1),
        variance_cumsum,
    )
    i, aux = 0, 0
    for x, y in zip(np.arange(1, n_components + 1, 25), variance_cumsum[:-1:25]):
        ax.plot(x, y, 'ko')
        ax.annotate(
            np.round(y, 3),
            (x, y),
            textcoords='offset points',
            xytext=(-7, 12),
            ha='center'
        )
    plt.show()

def get_pca(X):
    pca = PCA(n_components=None)
    X_tmp = pca.fit_transform(X)
    n_components = pca.n_components_
    variance_cumsum = pca.explained_variance_ratio_.cumsum()
    print(f'X_tmp shape: {X_tmp.shape}')
    print(f'N_components: {n_components}')
    print(f'Porcentaje de varianza acumulada: {variance_cumsum}')
    draw_pca_variance(n_components, variance_cumsum)
    return pca

In [6]:
get_pca(X)
sd = seed(time())

pipe_1 = Pipeline([
    ('Scaler', StandardScaler()),
    ('PCA', PCA(n_components=None)),
    ('MLPClassifier', MLPClassifier(random_state=sd))
])

RSKFold_1 = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=sd)
hidden_layers_1 = [(100, 50, 25), (200, 100, 50), (150, 75, 30, 10), (200, 100, 50, 25)]

grid_space_1 = [
    {
        'MLPClassifier__solver': ['lbfgs', 'adam'],
        'MLPClassifier__max_iter': range(1000, 3500, 500),
        'MLPClassifier__activation': ['logistic', 'tanh', 'relu'],
        'MLPClassifier__hidden_layer_sizes': hidden_layers_1,
        'MLPClassifier__learning_rate_init': [1e-4, 1e-3, 1e-2, 1e-1]
    }, 
    {
        'MLPClassifier__solver': ['sgd'],
        'MLPClassifier__max_iter': range(1000, 3500, 500),
        'MLPClassifier__activation': ['logistic', 'tanh', 'relu'],
        'MLPClassifier__hidden_layer_sizes': hidden_layers_1,
        'MLPClassifier__learning_rate_init': [1e-4, 1e-3, 1e-2, 1e-1],
        'MLPClassifier__momentum': np.arange(0.1, 1, 0.1)
    }
]

grid_1 = GridSearchCV(
    estimator=pipe_1,
    param_grid=grid_space_1,
    cv=RSKFold_1,
    scoring='balanced_accuracy',
    return_train_score=True,
    verbose=4,
    n_jobs=-1
)


In [None]:
res_1 = grid_1.fit(X, classes)

In [None]:
RES_FILE_NAME_1 = 'nn_1_results.pkl'
joblib.dump(res_1, RES_FILE_NAME_1)

## Perceptrón multi-capa con reducción de dimensionalidad PCA y *data augmentation*

In [5]:
def data_augmentation(imgs, classes, aug_per_image, imgs_trans):
    new_images, new_classes = [], []
    for img, y, i in zip(imgs, classes, range(1, len(imgs) + 1)):
        for _ in range(aug_per_image):
            tmp_img = imgs_trans(image=img)["image"]
            new_images.append(tmp_img)
            new_classes.append(y)
        if i % 50 == 0: print(f'[!] {i} images agumented...')
    print(f'[!] Total of {len(imgs)} images augmented!\n')
    return new_images, new_classes

In [6]:
X_yes, X_no, classes = load_dataset()
#X = reshape_dataset(X_yes, X_no)

X_yes shape: (135, 214, 214)
X_no shape (87, 214, 214)
classes shape: (222,)


In [8]:
imgs_trans = A.Compose([
    A.HorizontalFlip(p=0.25),
    A.VerticalFlip(p=0.25),
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), p=0.25),
    A.Rotate(limit=[-15, 15], p=0.25, border_mode=cv2.BORDER_CONSTANT),
    A.Affine(translate_percent=(-0.05, 0.05))
])

X_yes_aug = data_augmentation(X_yes, 20, imgs_trans)
X_no_aug = data_augmentation(X_no, 20, imgs_trans)
X = reshape_dataset(X_yes_aug, X_no_aug)

[!] 50 images agumented...
[!] 100 images agumented...
[!] Total of 135 images augmented!

[!] 50 images agumented...
[!] Total of 87 images augmented!

X shape: (4440, 45796)


In [9]:
def draw_images(imgs):
    for img in imgs:
        cv2.imshow("Img", img)
        cv2.waitKey()
    cv2.destroyAllWindows()
# draw_images(X_yes_aug)
# draw_images(X_no_aug)

In [11]:
sd = seed(time())

pipe_2 = Pipeline([
    ('Scaler', StandardScaler()),
    ('PCA', PCA(n_components=None)),
    ('MLPClassifier', MLPClassifier(random_state=sd))
])

RSKFold_2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=sd)
hidden_layers_2 = [(100, 50, 25), (200, 100, 50), (150, 75, 30, 10), (200, 100, 50, 25)]

grid_space_2 = [
    {
        'MLPClassifier__solver': ['lbfgs', 'adam'],
        'MLPClassifier__max_iter': range(1000, 3500, 500),
        'MLPClassifier__activation': ['logistic', 'tanh', 'relu'],
        'MLPClassifier__hidden_layer_sizes': hidden_layers_2,
        'MLPClassifier__learning_rate_init': [1e-4, 1e-3, 1e-2, 1e-1]
    }, 
    {
        'MLPClassifier__solver': ['sgd'],
        'MLPClassifier__max_iter': range(1000, 3500, 500),
        'MLPClassifier__activation': ['logistic', 'tanh', 'relu'],
        'MLPClassifier__hidden_layer_sizes': hidden_layers_2,
        'MLPClassifier__learning_rate_init': [1e-4, 1e-3, 1e-2, 1e-1],
        'MLPClassifier__momentum': np.arange(0.1, 1, 0.1)
    }
]

grid_2 = GridSearchCV(
    estimator=pipe_2,
    param_grid=grid_space_2,
    cv=RSKFold_2,
    scoring='balanced_accuracy',
    return_train_score=True,
    verbose=4,
    n_jobs=-1
)

In [None]:
res_2 = grid_2.fit(X, classes)

In [None]:
RES_FILE_NAME_2 = 'nn_2_results.pkl'
joblib.dump(res_2, RES_FILE_NAME_2)