In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
df_heart= pd.read_csv("HeartDiseaseTrain-Test.csv")
df_heart.head()

In [None]:
df_heart.columns

In [None]:
df_heart.rename(columns={
    "sex": "gender",
    "resting_blood_pressure": "systolic_bp",
    "cholestoral": "cholesterol",
    "fasting_blood_sugar": "glucose"
}, inplace= True)

# df_heart.head()


In [None]:
df_heart["diastolic_bp"]= 80
df_heart["bmi"]= 25
df_heart["smoking"]= 0
df_heart["alcohol"]= 0
df_heart["family_history"]= 0


In [None]:
schema_cols = [
    "age",
    "gender",
    "systolic_bp",
    "diastolic_bp",
    "glucose",
    "cholesterol",
    "bmi",
    "smoking",
    "alcohol",
    "family_history",
    "target"
]

df_heart= df_heart[schema_cols]
df_heart.head()

In [None]:

df_heart["glucose"]= df_heart["glucose"].map({
    "Lower than 120 mg/ml": 0,
    "Greater than 120 mg/ml": 1
})
df_heart.head()


In [None]:
df_heart["gender"]= df_heart["gender"].str.strip().map({
    "Male": 1,
    "Female": 0
})

df_heart.head()

In [None]:
df_heart.isnull().sum()
df_heart.dtypes

In [None]:
x= df_heart.drop("target", axis= 1)
y= df_heart["target"]

In [None]:
df_heart2.head()

In [126]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression

categorical_features = ['sex','chest_pain_type','fasting_blood_sugar','rest_ecg','exercise_induced_angina', 'slope','vessels_colored_by_flourosopy','thalassemia']
numeric_features = ['age','resting_blood_pressure','cholestoral','Max_heart_rate','oldpeak']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ]
)
heart_stage2= Pipeline([
    ('preprocessor', preprocessor),
    ('classifier',LogisticRegression(class_weight={0:1,1:2}, max_iter=1000))
])


In [124]:
from sklearn.pipeline import make_pipeline
heart_stage1= make_pipeline(
    StandardScaler(),
    LogisticRegression(class_weight={0:1,1:2}, max_iter=1000)
)

In [133]:
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=423)
# x_train_scaled= StandardScaler().fit_transform(x_train)
# x_test_scaled= StandardScaler().fit_transform(x_test)
heart_stage1.fit(x_train , y_train)
y_pred= heart_stage1.predict(x_test)

In [134]:
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test,y_pred)
cm

array([[ 32,  60],
       [  9, 104]])

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [135]:
y_prob= heart_stage1.predict_proba(x_test)[:,1]
def heart_risk_prob(prob):
    if prob< 0.30:
        return "LOW"
    elif prob< 0.60:
        return "MODERATE"
    else:
        return "HIGH"
    
def heart_recommendation(risk):
    if risk == "LOW":
        return "No immediate concern. Maintain healthy lifestyle."
    elif risk == "MODERATE":
        return "Medical consultation recommended."
    else:
        return "Urgent cardiac evaluation advised."




In [None]:
for i in range(5):
    print(
        f"Risk Probability: {y_prob[i]*100:.1f}%",
        "| Risk Level:", heart_risk_prob(y_prob[i])
    )


In [119]:
df_heart1= pd.read_csv("HeartDiseaseTrain-Test.csv")
df_heart1.head()

Unnamed: 0,age,sex,chest_pain_type,resting_blood_pressure,cholestoral,fasting_blood_sugar,rest_ecg,Max_heart_rate,exercise_induced_angina,oldpeak,slope,vessels_colored_by_flourosopy,thalassemia,target
0,52,Male,Typical angina,125,212,Lower than 120 mg/ml,ST-T wave abnormality,168,No,1.0,Downsloping,Two,Reversable Defect,0
1,53,Male,Typical angina,140,203,Greater than 120 mg/ml,Normal,155,Yes,3.1,Upsloping,Zero,Reversable Defect,0
2,70,Male,Typical angina,145,174,Lower than 120 mg/ml,ST-T wave abnormality,125,Yes,2.6,Upsloping,Zero,Reversable Defect,0
3,61,Male,Typical angina,148,203,Lower than 120 mg/ml,ST-T wave abnormality,161,No,0.0,Downsloping,One,Reversable Defect,0
4,62,Female,Typical angina,138,294,Greater than 120 mg/ml,ST-T wave abnormality,106,No,1.9,Flat,Three,Fixed Defect,0


In [None]:
from sklearn.preprocessing import LabelEncoder

le= LabelEncoder()

cat_cols= [
    "sex",
    "chest_pain_type",
    "rest_ecg",
    "exercise_induced_angina",
    "slope",
    "vessels_colored_by_flourosopy",
    "thalassemia"
]

for cols in cat_cols:
    df_heart1[cols]= le.fit_transform(df_heart1[cols])
df_heart1.head()



In [None]:
df_heart1["fasting_blood_sugar"]= df_heart1["fasting_blood_sugar"].str.strip().map({
    "Lower than 120 mg/ml": 0,
    "Greater than 120 mg/ml":1
})
df_heart1.head(8)

In [120]:
x1= df_heart1.drop("target",axis=1)
y1= df_heart1["target"]

In [127]:
x_train1,x_test1,y_train1,y_test1= train_test_split(x1,y1,test_size=0.2,random_state=334)
heart_stage2.fit(x_train1 , y_train1)
y_pred1= heart_stage2.predict(x_test1)
accuracy_score(y_pred1,y_test1)

0.9219512195121952

In [128]:
print(confusion_matrix(y_test1,y_pred1))
print(classification_report(y_test1,y_pred1))

[[ 72  16]
 [  0 117]]
              precision    recall  f1-score   support

           0       1.00      0.82      0.90        88
           1       0.88      1.00      0.94       117

    accuracy                           0.92       205
   macro avg       0.94      0.91      0.92       205
weighted avg       0.93      0.92      0.92       205



In [None]:
from sklearn.pipeline import make_pipeline

In [136]:
import pickle

pickle.dump(heart_stage1,open('models/Heart_Stage1_Model.pkl','wb'))
pickle.dump(heart_stage2,open('models/Heart_Stage2_Model.pkl','wb'))

In [None]:
df_heart.head(2)

In [None]:
df_heart1.head(2)

In [None]:
df_heart2= pd.read_csv("HeartDiseaseTrain-Test.csv")
df_heart2.head(2)

In [137]:
heart_stage1.predict_proba(pd.DataFrame([x.iloc[0]]))

array([[0.3529149, 0.6470851]])

In [138]:
accuracy_score(y_pred1,y_test1)

0.9219512195121952

In [139]:
print(confusion_matrix(y_pred1,y_test1))
print(classification_report(y_pred1,y_test1))

[[ 72   0]
 [ 16 117]]
              precision    recall  f1-score   support

           0       0.82      1.00      0.90        72
           1       1.00      0.88      0.94       133

    accuracy                           0.92       205
   macro avg       0.91      0.94      0.92       205
weighted avg       0.94      0.92      0.92       205

