In [5]:
import pandas as pd 
import numpy as np
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from time import time
from random import seed

In [2]:
DATA_PATH = './data'
X_yes = np.load(f'{DATA_PATH}/def_yes_images.npy')
X_no = np.load(f'{DATA_PATH}/def_no_images.npy')
classes = np.load(f'{DATA_PATH}/classes.npy')

print(f'X_yes shape: {X_yes.shape}')
print(f'X_no shape {X_no.shape}')
print(f'classes shape: {classes.shape}')

X_yes shape: (120, 214, 214)
X_no shape (83, 214, 214)
classes shape: (203,)


In [3]:
X = np.row_stack((X_yes, X_no))
N, SIZE_H, SIZE_V = X.shape
X = np.reshape(X, newshape=(N, SIZE_H * SIZE_V))
print(f'X shape: {X.shape}')

X shape: (203, 45796)


In [None]:
def get_data_augmentator(X):
    data_gen = ImageDataGenerator(
        rotation_range=30, # Rotation up to 30 degrees
        fill_mode='nearest', # Fill mode for new pixels after roatation
        height_shift_range=0.1, # Shift vertically up to 10% of img dimensions
        width_shift_range=0.1, # Shift horizontally up to 10% of img dimensions
        horizontal_flip=True, # Random flip over horizontal axis
        vertical_flip=True, # Random flip over vertical axis
        brightness_range=[0.8, 1.2], # Brightness change from 0.8 (darker) up to 1.2 (brighter)
    )
    return data_gen

## Perceptrón multi-capa con reducción de dimensionalidad PCA

In [None]:
def draw_pca_variance(n_components, variance_cumsum):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
    ax.set_xlabel('Componente principal')
    ax.set_ylabel('Porcentaje de varianza acumulada')
    ax.plot(
        np.arange(1, n_components + 1),
        variance_cumsum,
    )
    i, aux = 0, 0
    for x, y in zip(np.arange(1, n_components + 1, 25), variance_cumsum[:-1:25]):
        ax.plot(x, y, 'ko')
        ax.annotate(
            np.round(y, 3),
            (x, y),
            textcoords='offset points',
            xytext=(-7, 12),
            ha='center'
        )
    plt.show()

def get_pca(X):
    pca = PCA(n_components=None)
    X_tmp = pca.fit_transform(X)
    n_components = pca.n_components_
    variance_cumsum = pca.explained_variance_ratio_.cumsum()
    print(f'X_tmp shape: {X_tmp.shape}')
    print(f'N_components: {n_components}')
    print(f'Porcentaje de varianza acumulada: {variance_cumsum}')
    draw_pca_variance(n_components, variance_cumsum)
    return pca

In [None]:
sd = seed(time())
pca = get_pca(X)

pipe = Pipeline([
    ('Scaler', StandardScaler()),
    ('PCA', pca),
    ('MLPClassifier', MLPClassifier(random_state=sd))
])

RSKFold = RepeatedStratifiedKFold(n_splits=5, n_rpeats=10, random_state=sd)

grid_space = [
    {
        'MLPClassifier__solver': ['lbfgs', 'adam', 'sgd'],
        'MLPClassifier__max_iter': range(100, 1100, 100),
        'MLPClassifier__activation': ['logistic', 'tanh', 'relu'],
        'MLPClassifier__hidden_layer_sizes': []
        'MLPClassifier__learning_rate_init': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        'MLPClassifier__momentum': np.arange(0.1, 1, 0.1)
    }
]