# Heart Failure Prediction Datase

## Import Libraries

In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib


## Load dataset

In [23]:
# Load dataset
df = pd.read_csv('C:/Users/Salee/PycharmProjects/TeamProject/heart.csv')
df.head()


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289.0,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180.0,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283.0,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214.0,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195.0,0,Normal,122,N,0.0,Up,0


## Handle Missing Values

In [24]:
# Fill missing Cholesterol values with the mean
df['Cholesterol'] = df['Cholesterol'].fillna(df['Cholesterol'].mean())


## Encode Categorical Variables

In [25]:
# Map categorical to numerical
df['Sex'] = df['Sex'].map({'M': 1, 'F': 0})
df['ChestPainType'] = df['ChestPainType'].map({'ATA': 1, 'NAP': 2, 'ASY': 3, 'TA': 4})
df['RestingECG'] = df['RestingECG'].map({'Normal': 1, 'ST': 2, 'LVH': 3})
df['ExerciseAngina'] = df['ExerciseAngina'].map({'N': 0, 'Y': 1})
df['ST_Slope'] = df['ST_Slope'].map({'Up': 1, 'Flat': 0, 'Down': 2})  # Adjust/remove 'Down' if not present


## Define Features and Target

In [26]:
# Define features and target
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

## Scale Numerical Features

In [27]:
# Scale only numerical columns
numerical_cols = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])


## Split Data

In [28]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Train Logistic Regression Model

In [29]:
# Train the model
model = LogisticRegression(random_state=42, class_weight='balanced')
model.fit(X_train, y_train)


## Predict and Evaluate

In [30]:
# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 0.8098
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.84      0.79        77
           1       0.88      0.79      0.83       107

    accuracy                           0.81       184
   macro avg       0.81      0.81      0.81       184
weighted avg       0.82      0.81      0.81       184



## Save Model and Scaler

In [31]:
# Save trained model and scaler
joblib.dump(model, 'logistic_regression_model.pkl')
print("Saved logistic_regression_model.pkl")

joblib.dump(scaler, 'standard_scaler.pkl')
print("Saved standard_scaler.pkl")


Saved logistic_regression_model.pkl
Saved standard_scaler.pkl
