In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
df_hypertension= pd.read_csv("hypertension_dataset.csv")
df_hypertension.head()

In [None]:
df_hypertension= df_hypertension.rename(columns={
    "Age": "age",
    "BMI": "bmi",
    "Smoking_Status": "smoking",
    "Family_History": "family_history"
})

df_hypertension.head()

In [None]:
df_hypertension["gender"]= "male"
df_hypertension["systolic_bp"]= df_hypertension["BP_History"].map({
    "Normal": 120,
    "Prehypertension": 135,
    "Hypertension": 150
})

df_hypertension["diastolic_bp"]= df_hypertension["BP_History"].map({
    "Normal": 80,
    "Prehypertension": 88,
    "Hypertension": 95
})

df_hypertension["glucose"]= 100
df_hypertension["cholesterol"]= 200

df_hypertension["smoking"]= df_hypertension["smoking"].str.strip().map({
    "Non-Smoker":0,
    "Smoker":1
})

df_hypertension['alcohol']=0

df_hypertension['family_history']= df_hypertension['family_history'].str.strip().map({
    'No': 0,
    'Yes': 1
})

df_hypertension= df_hypertension.rename(columns={
    'Has_Hypertension': 'target'
})




In [None]:
df_hypertension['target']= df_hypertension['target'].str.strip().map({
    'No': 0,
    'Yes': 1
})

In [None]:
df_hypertension.head()

In [None]:
schema_cols = [
    "age",
    "gender",
    "systolic_bp",
    "diastolic_bp",
    "glucose",
    "cholesterol",
    "bmi",
    "smoking",
    "alcohol",
    "family_history",
    "target"
]

df_hypertension= df_hypertension[schema_cols]

In [None]:
df_hypertension.head()

In [None]:
df_hypertension['gender']= 1

In [None]:
x= df_hypertension.drop("target", axis=1)
y= df_hypertension["target"]

In [None]:
from sklearn.pipeline import make_pipeline

hypertension_stage1= make_pipeline(
    LogisticRegression(max_iter=1000)
)


In [None]:
x_train,x_test,y_train,y_test= train_test_split(x,y, train_size=0.2, random_state=701)
# x_train_scaled= scaler.fit_transform(x_train)
# x_test_scaled= scaler.fit_transform(x_test)
    
hypertension_stage1.fit(x_train,y_train)
y_pred= hypertension_stage1.predict(x_test)
print(accuracy_score(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

In [None]:
y_prob= hypertension_stage1.predict_proba(x_test)[:,1]
def heart_risk_prob(prob):
    if prob< 0.30:
        return "LOW"
    elif prob< 0.60:
        return "MODERATE"
    else:
        return "HIGH"
    
def heart_recommendation(risk):
    if risk == "LOW":
        return "No immediate concern. Maintain healthy lifestyle."
    elif risk == "MODERATE":
        return "Medical consultation recommended."
    else:
        return "Urgent medical evaluation advised."
    


In [184]:
df_hypertension1= pd.read_csv("hypertension_dataset.csv")

In [None]:
df_hypertension1.head()

In [None]:
from sklearn.preprocessing import LabelEncoder


cat_cols= [
    "BP_History",
    "Medication",
    "Family_History",
    "Exercise_Level",
    "Smoking_Status",
    "Has_Hypertension"
]

le= LabelEncoder()

for cols in cat_cols:
    df_hypertension1[cols]= le.fit_transform(df_hypertension1[cols])


In [None]:
df_hypertension1.head()

In [185]:
x1= df_hypertension1.drop("Has_Hypertension", axis=1)
y1= df_hypertension1["Has_Hypertension"]

In [186]:

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression

categorical_features = ['BP_History','Medication','Exercise_Level','Smoking_Status','Family_History']
numeric_features = ['Age','Salt_Intake','Stress_Score','Sleep_Duration','BMI']


num_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

cat_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", num_pipe, numeric_features),
    ("cat", cat_pipe, categorical_features)
])



hypertension_stage2 = Pipeline([
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)),
    ('lr', LogisticRegression(
        penalty='l2',
        C=0.2,
        max_iter=1000
    ))
])


In [187]:
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, train_size=0.2, random_state=549)

hypertension_stage2.fit(x_train1, y_train1)

y_pred1 = hypertension_stage2.predict(x_test1)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy:", accuracy_score(y_pred1, y_test1))
print(confusion_matrix(y_pred1, y_test1))
print(classification_report(y_pred1, y_test1))


Accuracy: 0.8727959697732998
[[673 124]
 [ 78 713]]
              precision    recall  f1-score   support

          No       0.90      0.84      0.87       797
         Yes       0.85      0.90      0.88       791

    accuracy                           0.87      1588
   macro avg       0.87      0.87      0.87      1588
weighted avg       0.87      0.87      0.87      1588



In [None]:
import pickle
pickle.dump(hypertension_stage1,open('models/Hypertension_Stage1_Model.pkl','wb'))

In [189]:
pickle.dump(hypertension_stage2,open('models/Hypertension_Stage2_Model.pkl','wb'))

In [None]:
df_hypertension1.head(2)

In [None]:
df_hypertension1.head(2)

In [None]:
df_hypertension2= pd.read_csv("hypertension_dataset.csv")
df_hypertension2.head(2)

In [188]:
hypertension_stage2.predict_proba(pd.DataFrame([x1.iloc[0]]))

array([[0.18863444, 0.81136556]])

In [None]:
hypertension_stage2.predict_proba(pd.DataFrame([x1.iloc[0]]))