In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.gaussian_process import GaussianProcessRegressor
import optuna
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = pd.read_csv('Heart Prediction Quantum Dataset.csv')
data.describe()

Unnamed: 0,Age,Gender,BloodPressure,Cholesterol,HeartRate,QuantumPatternFeature,HeartDisease
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,54.864,0.468,132.874,221.5,88.766,8.317407,0.6
std,14.315004,0.499475,26.418516,43.86363,17.417289,0.919629,0.490389
min,30.0,0.0,90.0,150.0,60.0,6.164692,0.0
25%,43.0,0.0,111.0,183.75,73.0,7.675779,0.0
50%,55.0,0.0,132.0,221.0,89.0,8.323064,1.0
75%,66.25,1.0,155.0,258.0,104.0,8.935999,1.0
max,79.0,1.0,179.0,299.0,119.0,10.784886,1.0


In [3]:
num_features = ['Age', 'BloodPressure', 'Cholesterol', 'HeartRate', 'QuantumPatternFeature'] 
cat_features = ['Gender'] 
X = data[num_features + cat_features] 
y = data['HeartDisease']

In [4]:
numerical_transformer = StandardScaler()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, num_features)
    ])

hidden_layers = (12, 7)
model = MLPClassifier(hidden_layer_sizes=hidden_layers, 
                      random_state=10, 
                      max_iter=10000)
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('model', model)])
pipeline.fit(X, y)

0,1,2
,steps,"[('preprocessor', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,hidden_layer_sizes,"(12, ...)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,10000
,shuffle,True


In [5]:
pipeline.score(X, y)

0.94

In [6]:
k_folds = 10
scores = cross_val_score(pipeline, X, y, scoring='roc_auc_ovr', cv=k_folds)
print(f'Cross-validation scores: {scores}')
print(f'Average cross-validation score: {scores.mean()}')
print(f'Standard deviation of cross-validation scores: {scores.std()}')

Cross-validation scores: [0.97833333 0.95333333 0.93833333 0.99666667 0.95833333 0.99166667
 0.98       1.         0.98833333 0.97833333]
Average cross-validation score: 0.9763333333333334
Standard deviation of cross-validation scores: 0.019160143817599905


In [7]:
k_folds = 10
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction='maximize')

def objective(trial):
    hidden_layer1 = trial.suggest_int('hidden_layer1', 10, 30)
    hidden_layer2 = trial.suggest_int('hidden_layer2', 5, 15)
    hidden_layers = (hidden_layer1, hidden_layer2)
    
    model = MLPClassifier(hidden_layer_sizes=hidden_layers, 
                          random_state=10, 
                          max_iter=10000)
    
    global pipeline
    
    scores = cross_val_score(pipeline, X, y, scoring='roc_auc_ovr', cv=k_folds)
    
    return scores.mean()

study.optimize(objective, n_trials=10, show_progress_bar=True)

  0%|          | 0/10 [00:00<?, ?it/s]

In [8]:
best_trial = study.best_trial
print(f'Best trial parameters: {best_trial.params}')
print(f'Best trial score: {best_trial.value}')

Best trial parameters: {'hidden_layer1': 28, 'hidden_layer2': 6}
Best trial score: 0.9763333333333334


# Actividad de clase

In [9]:
def auto_mlp(hidden_layers):
    global X, y, num_features
    numerical_transformer = StandardScaler()
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, num_features)
        ])

    model = MLPClassifier(hidden_layer_sizes=hidden_layers, 
                        random_state=10, 
                        max_iter=10000)
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                            ('model', model)])
    pipeline.fit(X, y)

    k_folds = 10
    scores = cross_val_score(pipeline, X, y, scoring='roc_auc_ovr', cv=k_folds)
    return scores.mean(), scores.std()



In [10]:
auto_mlp((12, 7))

(np.float64(0.9763333333333334), np.float64(0.019160143817599905))

In [11]:
num_features = ['Age', 'BloodPressure', 'Cholesterol', 'HeartRate', 'QuantumPatternFeature'] 
cat_features = ['Gender'] 
X = data[num_features + cat_features] 
y = data['HeartDisease']

x_reg = [(10, 10),
     (20, 5),
     (30, 15)]
y_pred = []
std_pred = []
y_reg = []
y_reg_std = []
for i in x_reg:
    mean_score = auto_mlp(i)[0]
    std_score = auto_mlp(i)[1]
    y_reg.append(mean_score)
    y_reg_std.append(std_score)

for i in range(len(x_reg)):
    print(f'Hidden layers: {x_reg[i]}, Mean CV score: {y_reg[i]:.4f} ± {y_reg_std[i]:.4f}')

Hidden layers: (10, 10), Mean CV score: 0.9765 ± 0.0175
Hidden layers: (20, 5), Mean CV score: 0.9753 ± 0.0174
Hidden layers: (30, 15), Mean CV score: 0.9620 ± 0.0168


In [21]:
model_regression = GaussianProcessRegressor()
model_regression.fit(x_reg, y_reg)

y_pred, std_pred = model_regression.predict(x_reg, return_std=True)

x_1n = np.random.uniform(10, 30, 1000).round().astype(int)
x_2n = np.random.uniform(5, 15, 1000).round().astype(int)
x_3n = np.random.uniform(15, 25, 1000).round().astype(int)

X_new1 = np.column_stack([x_1n, x_2n])
y_pred1, std1 = model_regression.predict(X_new1, return_std=True)

X_new2 = np.column_stack([x_1n, x_3n])
y_pred2, std2 = model_regression.predict(X_new2, return_std=True)

X_new3 = np.column_stack([x_2n, x_3n])
y_pred3, std3 = model_regression.predict(X_new3, return_std=True)


In [13]:
y_upper = y_pred1 + 1.96 * std1
y_lower = y_pred1 - 1.96 * std1

In [14]:
y_reg

[np.float64(0.9765), np.float64(0.9753333333333334), np.float64(0.962)]

In [15]:
# Max del intervalo de confianza
max_index = np.argmax(y_upper)
x_val = X_new1[max_index]
y_val = y_pred1[max_index]
x_added = np.vstack([x_reg, x_val])

In [26]:
def auto_loop_mlp(initial_x, initial_y, x_candidates, number_iterations):
    X = initial_x
    y = initial_y
    model_regression = GaussianProcessRegressor()

    model_regression.fit(X, y)

    for i in range(number_iterations):
        y_pred, std_pred = model_regression.predict(x_candidates, return_std=True)

        y_upper = y_pred + 1.96 * std_pred
        x_val = x_candidates[np.argmax(y_upper)]  

        mlp = auto_mlp(x_val)
        y_val = mlp[0]

        X = np.vstack([X, x_val])
        y = np.append(y, y_val)

        model_regression.fit(X, y)

    score = auto_mlp(X[-1])
    if number_iterations <= 5:
        print(f'Final hidden layers: {X}, Mean CV score: {score[0]:.4f} ± {score[1]:.4f}')
    else:
        print(f'Final hidden layers: {X[-1]}, Mean CV score: {score[0]:.4f} ± {score[1]:.4f}')    


In [28]:
auto_loop_mlp(x_reg, y_reg, X_new1, 30)

Final hidden layers: [17  9], Mean CV score: 0.9728 ± 0.0170


In [29]:
auto_loop_mlp(x_reg, y_reg, X_new2, 5)

Final hidden layers: [[10 10]
 [20  5]
 [30 15]
 [29 16]
 [29 15]
 [30 17]
 [29 18]
 [30 19]], Mean CV score: 0.9645 ± 0.0202


In [30]:
auto_loop_mlp(x_reg, y_reg, X_new3, 5)

Final hidden layers: [[10 10]
 [20  5]
 [30 15]
 [10 15]
 [ 9 16]
 [ 9 15]
 [10 17]
 [11 16]], Mean CV score: 0.9762 ± 0.0151
