In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import joblib

# Load data
data = pd.read_csv('/Users/kigali/Desktop/whynot/whynot/data/Heart_Disease_Prediction.csv')

# Preprocess data
def preprocess_data(df):
    target_column = 'Heart Disease'
    feature_columns = ['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 
                       'FBS over 120', 'EKG results', 'Max HR', 
                       'Exercise angina', 'ST depression', 'Slope of ST', 
                       'Number of vessels fluro', 'Thallium']

    df[target_column] = df[target_column].map({'Presence': 1, 'Absence': 0})

    # Encode categorical features
    encoders = {}
    for feature in ['Sex', 'Chest pain type', 'Exercise angina', 'Slope of ST']:
        encoder = LabelEncoder()
        df[feature] = encoder.fit_transform(df[feature])
        encoders[feature] = encoder

    # Save encoders
    for feature, encoder in encoders.items():
        joblib.dump(encoder, f'../models/encoder_{feature}.pkl')

    X = df[feature_columns]
    y = df[target_column]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = preprocess_data(data)

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000)
model.fit(X_train_scaled, y_train)

# Save scaler and model
joblib.dump(scaler, '../models/scaler.pkl')
joblib.dump(model, '../models/mlp_model.pkl')

# Evaluate model
accuracy = model.score(X_test_scaled, y_test)
print(f'Model Accuracy: {accuracy}')


Model Accuracy: 0.8148148148148148
