# ANN for Heart Disease

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.model_selection import train_test_split
import warnings

# Load preprocessed data
X = pd.read_csv("../preprocessing/preprocessed_heart_disease_X.csv")
y = pd.read_csv("../preprocessing/preprocessed_heart_disease_y.csv").squeeze()

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalise features (fit only on training data)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define MLP Classifier
mlp_clf = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu',
                        solver='adam', max_iter=500, random_state=42)

# K-Fold Cross-Validation
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)
accuracy_scorer = make_scorer(accuracy_score) # Classification accuracy as the scoring metric
cv_scores = cross_val_score(mlp_clf, X_train_scaled, y_train, cv=kf, scoring=accuracy_scorer)

# Print results
print(f"K-Fold Cross-Validation (k={k})")
print(f"Accuracy scores for each fold: {cv_scores}")
print(f"Mean accuracy: {cv_scores.mean():.4f}")
print(f"Standard deviation: {cv_scores.std():.4f}")

K-Fold Cross-Validation (k=5)
Accuracy scores for each fold: [0.625      0.58333333 0.63829787 0.4893617  0.46808511]
Mean accuracy: 0.5608
Standard deviation: 0.0698
