In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import optuna
import joblib

from sklearn.neural_network import MLPClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, accuracy_score, ConfusionMatrixDisplay, roc_auc_score
from sklearn.model_selection import StratifiedKFold, cross_val_score

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
df =  pd.read_csv('df_att.csv')
target = 'Heart Disease'
X = df.drop(target, axis=1)
y = df[target]

num_features = X.select_dtypes(include='number').columns.tolist()
cat_features = X.select_dtypes(exclude='number').columns.tolist()


preprocessor = ColumnTransformer(
    transformers =[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(handle_unknown='ignore'), cat_features)
    ]
)

In [7]:
folds  = joblib.load('folds.pkl')

In [12]:
oof_preds = np.zeros(len(X))

In [27]:
for fold, (X_train_idx, X_val_idx) in enumerate(folds):
    model = MLPClassifier(
    hidden_layer_sizes=(128,),
    alpha=5.894589851301599e-05,
    learning_rate_init=0.0007594934857959463,
    activation='relu',
    random_state=42)
    
    X_train, y_train= X.iloc[X_train_idx], y.iloc[X_train_idx]
    X_val, y_val = X.iloc[X_val_idx], y.iloc[X_val_idx]
    
    preprocessor.fit(X_train)
    
    X_train_transformed = preprocessor.transform(X_train)
    X_val_transformed = preprocessor.transform(X_val)
    
    model.fit(X_train_transformed, y_train)

    oof_preds[X_val_idx] = model.predict_proba(X_val_transformed)[:,1]
    print(f'O folds {fold} foi concluído')


O folds 0 foi concluído
O folds 1 foi concluído
O folds 2 foi concluído
O folds 3 foi concluído
O folds 4 foi concluído


In [28]:
oof_preds

array([0.99705674, 0.01093415, 0.01761819, ..., 0.94724442, 0.44035608,
       0.0033776 ], shape=(630000,))

In [29]:
np.save('oof_mlpc_preds.npy', oof_preds)

In [32]:
roc_auc_score(y, oof_preds)

0.9532010658397945