# **Import libraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from xgboost import XGBClassifier

def print_metrics(model_name, true_labels, predicted_labels):
    precision = precision_score(true_labels, predicted_labels)

    recall = recall_score(true_labels, predicted_labels)

    f1 = f1_score(true_labels, predicted_labels)

    print(model_name+": Precision:", precision)
    print(model_name+": Recall:", recall)
    print(model_name+": F1 Score:", f1)

# **Data Preprocessing**

In [2]:
df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')
df.rename(columns={"cp":"chest_pain","trtbps":"resting_blood_presure","chol":"cholestoral","fbs":"fasting_blood_sugar","restecg":"resting_electrocardiographic_results","thalachh":"maximum_heart_rate","exng":"exercise_induced_angina","slp":"slope","caa":"number_of_colored_vessels","thall":"thalassemia"},inplace=True)
df = df.drop_duplicates()

# define threshold for the high blood pressure
high_threshold_systolic = 130
# function to categorize for high systolic blood pressure
def categorize_blood_pressure(row):
    if row['resting_blood_presure'] < high_threshold_systolic:
        return 'Normal'
    else:
        return 'High'
# create a new column 'blood_pressure_category' using the categorize_blood_pressure function
df['resting_blood_presure'] = df.apply(categorize_blood_pressure, axis=1)


# define the  threshold for high, normal, and low cholestrol levels
high_threshold = 240
normal_threshold_low = 200
normal_threshold_high = 239
# function to categorize cholestrol levels
def categorize_cholestrol(row):
    if row['cholestoral'] >= high_threshold:
        return 'High'
    elif normal_threshold_low <= row['cholestoral'] <= normal_threshold_high:
        return 'Normal'
    else:
        return 'Low'
df['cholestoral'] = df.apply(categorize_cholestrol, axis=1)   


# define the thresholds for low, normal and high maximum heart rate 
low_thre = 100
normal_thr_low = 100
normal_thr_high = 150
# function for categorize maximum heart rate
def categorize_max_heart_rate(row):
    if row['maximum_heart_rate'] < low_thre:
        return 'Low'
    elif normal_thr_low <= row['maximum_heart_rate'] <= normal_thr_low:
        return 'Normal'
    else:
        return 'High'
    
df['maximum_heart_rate'] = df.apply(categorize_max_heart_rate,axis=1)


# **Feature Engineering**

In [3]:
X = df.drop('output', axis=1)
y = df['output']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                                                    
    
# Define the categories for each ordinal feature
categories = [["Low", "Normal", "High"],  # categories for resting_blood_pressure
              ["Low", "Normal", "High"],  # categories for cholesterol
              ["Low", "Normal", "High"]]  # categories for maximum_heart_rate

transformer = ColumnTransformer(
    [
        ("ordinal", OrdinalEncoder(categories=categories), ["resting_blood_presure", "cholestoral", "maximum_heart_rate"]),
    ],
    remainder='passthrough'
)

# setting to get a pandas df
transformer.set_output(transform='pandas')

X_train_trans = transformer.fit_transform(X_train)
X_test_trans = transformer.fit_transform(X_test)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_train_trans, y_train, test_size=0.2, random_state=42)


# **Build Models**

In [4]:
# Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
print_metrics("Logistic Regression", y_test, lr_pred)

Logistic Regression: Precision: 0.7586206896551724
Logistic Regression: Recall: 0.88
Logistic Regression: F1 Score: 0.8148148148148148


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print_metrics("Random Forest", y_test, rf_pred)

Random Forest: Precision: 0.7777777777777778
Random Forest: Recall: 0.84
Random Forest: F1 Score: 0.8076923076923077


In [6]:
# Support Vector Machine
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
print_metrics("Support Vector Machine", y_test, svm_pred)

Support Vector Machine: Precision: 0.5319148936170213
Support Vector Machine: Recall: 1.0
Support Vector Machine: F1 Score: 0.6944444444444444


In [7]:
# K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)
print_metrics("K-Nearest Neighbors (KNN)", y_test, knn_pred)

K-Nearest Neighbors (KNN): Precision: 0.696969696969697
K-Nearest Neighbors (KNN): Recall: 0.92
K-Nearest Neighbors (KNN): F1 Score: 0.793103448275862


In [8]:
# Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)
print_metrics("Naive Bayes", y_test, nb_pred)

Naive Bayes: Precision: 0.7419354838709677
Naive Bayes: Recall: 0.92
Naive Bayes: F1 Score: 0.8214285714285714


In [10]:
# XGBoost
xgb_model = XGBClassifier()
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
print_metrics("XGBoost", y_test, xgb_pred)

XGBoost: Precision: 0.8148148148148148
XGBoost: Recall: 0.88
XGBoost: F1 Score: 0.8461538461538461
