## Prueba uno RedNeuronal

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import RFE, RFECV
from sklearn.model_selection import GridSearchCV
from pprint import pprint
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, f1_score
from sklearn.model_selection import cross_val_score
from imblearn.combine import SMOTEENN

In [4]:
# Loading the data
df_train = pd.read_csv("../data/raw/mitbih_train.csv")
df_test = pd.read_csv("../data/raw/mitbih_test.csv")

# Data normal heartbeat
df_normal = pd.read_csv("../data/raw/ptbdb_normal.csv")
df_anormal = pd.read_csv("../data/raw/ptbdb_abnormal.csv")

In [5]:
# Concatenando df_train con df_normal pero evitando la penúltima columna de ambos dataframes
df_train.columns = [f'Feature {i + 1}' for i in range(len(df_train.columns) - 1)] + ['Target']
df_normal.columns = [f'Feature {i + 1}' for i in range(len(df_normal.columns) - 1)] + ['Target']

df_train = pd.concat([df_train, df_normal], axis=0)

df_train.shape

(91598, 188)

In [6]:
# SEPARAMOS LOS DATOS PREDICTORES Y OBJETIVO
X = df_train.drop('Target', axis=1)
y = df_train['Target']

# Submuestreo
#rus = RandomUnderSampler(sampling_strategy='auto', random_state=42)
#X_train_resampled, y_train_resampled = rus.fit_resample(X, y)

# Separamos datos de entrenamiento (50%), validación (25%) y prueba (25%)
# X_train, X_test, y_train, y_test = train_test_split(X_train_resampled, y_train_resampled, test_size=0.25, random_state=42)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.34, random_state=42)


# Separamos datos de entrenamiento (50%), validación (25%) y prueba (25%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.34, random_state=42)


X_train.shape, X_val.shape, X_test.shape


((45340, 187), (23358, 187), (22900, 187))

In [14]:
# submuestreo, validación cruzada y selección de características
def evaluate_model(model, X_train, y_train, X_test, y_test, param_values):
    # Definir la validación cruzada externa
    n_splits = 5
    outer_kf = KFold(n_splits=n_splits, shuffle=True, random_state=0)
    fold_scores = []

    # # Si X es un DataFrame
    # X_train, X_test = X.iloc[X_train.index], X.iloc[X_test.index]
    # y_train, y_test = y.iloc[y_train.index], y.iloc[y_test.index]

    f1_scorer = make_scorer(f1_score, average='macro')
    
    # Crear el pipeline con selección de características y clasificación
    pipeline = ImbPipeline(steps=[
        ('scaler', StandardScaler()),
        ('classifier', model)
    ])

    # Configurar la búsqueda de hiperparámetros en la validación cruzada interna
    grid_search = GridSearchCV(pipeline, param_values, cv=outer_kf, scoring=f1_scorer, n_jobs=-1)

    # Ajustar la búsqueda de hiperparámetros
    grid_search.fit(X_train, y_train)
    
    # Obtener el mejor pipeline con los hiperparámetros óptimos
    best_pipeline = grid_search.best_estimator_

    # Evaluar el mejor pipeline en el conjunto de prueba interno
    y_pred = best_pipeline.predict(X_test)
    f1 = f1_score(y_test, y_pred, average='macro')
    fold_scores.append(f1)

    # Imprimir resultados
    print(f'F1 Score para datos de validación: {f1:.4f}')
    print(classification_report(y_test, y_pred))

    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='viridis')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

    return fold_scores, best_pipeline



In [17]:
alphas = np.random.uniform(0.0001, 1, 4)
print("Hiperparámetros para alpha: ", alphas)

max_iters = []
for i in range(4):
    max_iters.append(np.random.randint(600, 1000))
print(max_iters)
print("Hiperparámetros para max_iter: ", max_iters)

hidden_layer_sizes = []
for i in range(1, 11):
    val = np.random.randint(20, 80)
    hidden_layer_sizes.append((val,val))
    hidden_layer_sizes.append((val,val,val))
    #hidden_layer_sizes.append((val,val,val,val))
    #hidden_layer_sizes.append((val,val,val,val,val))
print("Hiperparámetros para hidden_layer_sizes: ", hidden_layer_sizes)

learning_rates = np.random.uniform(0.0001, 0.1, 3)
print("Hiperparámetros para learning_rate_init: ", learning_rates)

shuffle = [True, False]
print("Hiperparámetros para shuffle: ", shuffle)

n_iter_no_change = [5, 10, 20]
print("Hiperparámetros para n_iter_no_change: ", n_iter_no_change)


scores, best_pipeline = evaluate_model(MLPClassifier(), X_train, y_train, X_val, y_val, param_values={
    'classifier__hidden_layer_sizes': hidden_layer_sizes,
    'classifier__alpha': alphas,
    'classifier__max_iter': max_iters,
    'classifier__learning_rate_init': learning_rates,
    'classifier__shuffle': shuffle,
    'classifier__n_iter_no_change': n_iter_no_change
})

Hiperparámetros para alpha:  [0.13074466 0.73582887 0.62003556 0.45491183]
[973, 755, 617, 754]
Hiperparámetros para max_iter:  [973, 755, 617, 754]
Hiperparámetros para hidden_layer_sizes:  [(66, 66), (66, 66, 66), (27, 27), (27, 27, 27), (64, 64), (64, 64, 64), (78, 78), (78, 78, 78), (48, 48), (48, 48, 48), (69, 69), (69, 69, 69), (66, 66), (66, 66, 66), (73, 73), (73, 73, 73), (72, 72), (72, 72, 72), (32, 32), (32, 32, 32)]
Hiperparámetros para learning_rate_init:  [0.08320069 0.04202029 0.00199159]
Hiperparámetros para shuffle:  [True, False]
Hiperparámetros para n_iter_no_change:  [5, 10, 20]


KeyboardInterrupt: 

In [27]:
# Enviamos los datos de entrenamiento y validación
print('-------- Evaluando RF --------')
scores, model = evaluate_model(RandomForestClassifier(), X_train, y_train, X_val, y_val, param_values={'classifier__n_estimators': [10, 100, 1000], 
                                                                                                 'classifier__max_depth': [10, 100, 1000]})

-------- Evaluando RF --------


KeyboardInterrupt: 

In [9]:
# Evaluando el rendimiento del modelo final
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         0.0       0.77      0.77      0.77       190
         1.0       0.86      0.84      0.85       154
         2.0       0.85      0.89      0.87       147
         3.0       0.87      0.87      0.87       157
         4.0       0.96      0.93      0.94       154

    accuracy                           0.86       802
   macro avg       0.86      0.86      0.86       802
weighted avg       0.86      0.86      0.86       802



# RED NEURONAL RETO

In [7]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer, mean_squared_error

In [8]:
X = X.to_numpy()
y = y.to_numpy()

In [43]:
# Obteniendo el número de capas ocultas y el número de neuronas por capa óptimo
# # 5-fold cross-validation
print('------- Evaluando modelo perceptrón multicapa con CV y optimización de parámetros -------')
n_splits=5
kf = KFold(n_splits=n_splits, shuffle = True)

mse = 0
mse_l = []

hidden_layer_sizes = []
for i in range(1, 21):
    val = np.random.randint(20, 80)
    hidden_layer_sizes.append((val,val))
    hidden_layer_sizes.append((val,val,val))
    hidden_layer_sizes.append((val,val,val,val))
    hidden_layer_sizes.append((val,val,val,val,val))

for train_index, test_index in kf.split(X):
    print('Iteración: ', len(mse_l) + 1)

    # Training phase
    x_train = X[train_index, :]
    y_train = y[train_index]

    pipe = make_pipeline(MLPRegressor(max_iter=10000))

    param_grid = {
        'mlpregressor__hidden_layer_sizes': hidden_layer_sizes,
    }

    regr_cv = GridSearchCV(pipe, param_grid, cv=5)
    print('\tParámetros seleccionados en GridSearchCV:')

    for i in range(len(regr_cv.get_params()['param_grid'])):
        print('\t', regr_cv.get_params(deep=True,))

    regr_cv.fit(x_train, y_train)

    # Test phase
    x_test = X[test_index, :]
    y_test = y[test_index]    

    y_pred = regr_cv.predict(x_test)

    # Calculate MSE
    mse_i = mean_squared_error(y_test, y_pred)
    print('mse = ', mse_i)
    mse_l.append(mse_i)

    mse += mse_i 

mse = mse/n_splits
print('MSE = ', mse)

# Gráfica de barras
plt.bar(range(1, n_splits + 1), mse_l)
plt.xlabel('Fold')
plt.ylabel('MSE')
plt.show()



------- Evaluando modelo perceptrón multicapa con CV y optimización de parámetros -------
Iteración:  1
	Parámetros seleccionados en GridSearchCV:
	 {'cv': 5, 'error_score': nan, 'estimator__memory': None, 'estimator__steps': [('mlpregressor', MLPRegressor(max_iter=10000))], 'estimator__verbose': False, 'estimator__mlpregressor': MLPRegressor(max_iter=10000), 'estimator__mlpregressor__activation': 'relu', 'estimator__mlpregressor__alpha': 0.0001, 'estimator__mlpregressor__batch_size': 'auto', 'estimator__mlpregressor__beta_1': 0.9, 'estimator__mlpregressor__beta_2': 0.999, 'estimator__mlpregressor__early_stopping': False, 'estimator__mlpregressor__epsilon': 1e-08, 'estimator__mlpregressor__hidden_layer_sizes': (100,), 'estimator__mlpregressor__learning_rate': 'constant', 'estimator__mlpregressor__learning_rate_init': 0.001, 'estimator__mlpregressor__max_fun': 15000, 'estimator__mlpregressor__max_iter': 10000, 'estimator__mlpregressor__momentum': 0.9, 'estimator__mlpregressor__n_iter_no

