In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
from google.colab import files
uploaded = files.upload()

Saving hypertension_dataset.csv to hypertension_dataset.csv


In [3]:
import os
filename = list(uploaded.keys())[0]
print(f"Uploaded file: {filename}")

Uploaded file: hypertension_dataset.csv


In [4]:
import io

# Load the uploaded file into df
df = pd.read_csv(io.BytesIO(uploaded[filename]))

print(f"Loaded {len(df)} rows and {len(df.columns)} columns from {filename}.")
display(df.head())

Loaded 1985 rows and 11 columns from hypertension_dataset.csv.


Unnamed: 0,Age,Salt_Intake,Stress_Score,BP_History,Sleep_Duration,BMI,Medication,Family_History,Exercise_Level,Smoking_Status,Has_Hypertension
0,69,8.0,9,Normal,6.4,25.8,,Yes,Low,Non-Smoker,Yes
1,32,11.7,10,Normal,5.4,23.4,,No,Low,Non-Smoker,No
2,78,9.5,3,Normal,7.1,18.7,,No,Moderate,Non-Smoker,No
3,38,10.0,10,Hypertension,4.2,22.1,ACE Inhibitor,No,Low,Non-Smoker,Yes
4,41,9.8,1,Prehypertension,5.8,16.2,Other,No,Moderate,Non-Smoker,No


In [5]:
import numpy as np

# Generate realistic SBP/DBP based on BP_History
np.random.seed(42)
sbp = []
dbp = []

for history in df['BP_History']:
    if history == 'Hypertension':
        sbp.append(np.random.normal(150, 15))
        dbp.append(np.random.normal(95, 10))
    elif history == 'Prehypertension':
        sbp.append(np.random.normal(130, 10))
        dbp.append(np.random.normal(85, 8))
    else:  # Normal
        sbp.append(np.random.normal(115, 10))
        dbp.append(np.random.normal(75, 8))

df['Systolic_BP'] = np.round(sbp).astype(int)
df['Diastolic_BP'] = np.round(dbp).astype(int)

print(df[['BP_History', 'Systolic_BP', 'Diastolic_BP']].head(10))

        BP_History  Systolic_BP  Diastolic_BP
0           Normal          120            74
1           Normal          121            87
2           Normal          113            73
3     Hypertension          174           103
4  Prehypertension          125            89
5     Hypertension          143            90
6           Normal          117            60
7     Hypertension          124            89
8           Normal          105            78
9           Normal          106            64


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1985 entries, 0 to 1984
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Age               1985 non-null   int64  
 1   Salt_Intake       1985 non-null   float64
 2   Stress_Score      1985 non-null   int64  
 3   BP_History        1985 non-null   object 
 4   Sleep_Duration    1985 non-null   float64
 5   BMI               1985 non-null   float64
 6   Medication        1186 non-null   object 
 7   Family_History    1985 non-null   object 
 8   Exercise_Level    1985 non-null   object 
 9   Smoking_Status    1985 non-null   object 
 10  Has_Hypertension  1985 non-null   object 
 11  Systolic_BP       1985 non-null   int64  
 12  Diastolic_BP      1985 non-null   int64  
dtypes: float64(3), int64(4), object(6)
memory usage: 201.7+ KB


In [7]:
numeric = df.select_dtypes(include = "number")
cat = df.select_dtypes(include = "object")

In [8]:
unique = df.apply(pd.Series.unique)
unique

Unnamed: 0,0
Age,"[69, 32, 78, 38, 41, 20, 39, 70, 19, 47, 55, 8..."
Salt_Intake,"[8.0, 11.7, 9.5, 10.0, 9.8, 10.8, 8.9, 5.9, 9...."
Stress_Score,"[9, 10, 3, 1, 0, 7, 5, 8, 6, 4, 2]"
BP_History,"[Normal, Hypertension, Prehypertension]"
Sleep_Duration,"[6.4, 5.4, 7.1, 4.2, 5.8, 5.2, 7.8, 7.2, 4.7, ..."
BMI,"[25.8, 23.4, 18.7, 22.1, 16.2, 21.9, 27.6, 36...."
Medication,"[nan, ACE Inhibitor, Other, Beta Blocker, Diur..."
Family_History,"[Yes, No]"
Exercise_Level,"[Low, Moderate, High]"
Smoking_Status,"[Non-Smoker, Smoker]"


In [9]:
df["Family_History"] = df["Family_History"].map({"Yes" : 1, "No" : 0})
df["Smoking_Status"] = df["Smoking_Status"].map({"Non-Smoker" : 0, "Smoker" : 1})
df["Has_Hypertension"] = df["Has_Hypertension"].map({"Yes" : 1, "No" : 0})

In [10]:
data = pd.get_dummies(df, columns=["BP_History","Exercise_Level"],dtype=int, drop_first=True)

In [11]:
data.sample(5)

Unnamed: 0,Age,Salt_Intake,Stress_Score,Sleep_Duration,BMI,Medication,Family_History,Smoking_Status,Has_Hypertension,Systolic_BP,Diastolic_BP,BP_History_Normal,BP_History_Prehypertension,Exercise_Level_Low,Exercise_Level_Moderate
60,31,12.6,8,8.8,26.2,,0,0,0,138,78,0,1,0,1
1842,38,8.7,1,5.8,25.1,ACE Inhibitor,0,0,0,121,68,1,0,0,1
1914,62,7.5,3,5.6,30.9,Beta Blocker,1,0,1,168,100,0,0,1,0
875,70,12.1,3,5.8,31.3,,0,0,1,109,102,0,0,1,0
1076,32,9.6,9,7.5,21.8,Diuretic,1,0,0,115,77,1,0,0,1


In [12]:
data = data.drop("Medication", axis=1)

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

X = data.drop("Has_Hypertension",axis=1)
y = data["Has_Hypertension"]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42, stratify=y)

In [14]:
lr = LogisticRegression(max_iter = 900)
lr.fit(X_train,y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)

In [16]:
xgb = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss')
xgb.fit(X_train,y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [17]:
models = {"Logistic Regression" : lr, "Decision Tree" : dt, "XGBoost" : xgb}

for name, model in models.items() :
    y_pred = model.predict(X_test)

    print(f'''{name}
    Accuray : {accuracy_score(y_test,y_pred)}
    f1_score : {f1_score(y_test,y_pred)}''')

Logistic Regression
    Accuray : 0.8664987405541562
    f1_score : 0.8710462287104623
Decision Tree
    Accuray : 0.8916876574307305
    f1_score : 0.8963855421686747
XGBoost
    Accuray : 0.9622166246851386
    f1_score : 0.9635036496350365


In [18]:
xgb.save_model('xgb_hypertension_dailyMed.json')

# CELL 2 – Also save as pickle (for Flask/Django loading)
import joblib
joblib.dump(xgb, 'xgb_hypertension_dailyMed.pkl')

from google.colab import files
files.download('xgb_hypertension_dailyMed.json')
files.download('xgb_hypertension_dailyMed.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# =============================
# 🔹 Load Libraries
# =============================
import pandas as pd
import joblib
from google.colab import files

# =============================
# 🔹 Upload Your Saved Model
# =============================
print("Upload your trained SVM model file (svm_model.pkl)")
uploaded = files.upload()

model = joblib.load(list(uploaded.keys())[0])

# Get training columns directly from the model
training_columns = model.feature_names_in_
print("Training columns loaded from model:")
print(training_columns)

# =============================
# 🔹 Upload Test Dataset
# =============================
print("\nUpload the test CSV dataset")
uploaded = files.upload()

test_file = list(uploaded.keys())[0]
test_df = pd.read_csv(test_file)

print("Testing on:", test_file)
print(test_df.head())

# =============================
# 🔹 Preprocess Data (Same as Training)
# =============================

# Remove target if included
if 'Has_Hypertension' in test_df.columns:
    X_test = test_df.drop("Has_Hypertension", axis=1)
else:
    X_test = test_df.copy()

# One-hot encoding categorical features
X_test_processed = pd.get_dummies(X_test)

# Add missing columns (important)
for col in training_columns:
    if col not in X_test_processed.columns:
        X_test_processed[col] = 0

# Keep correct column order
X_test_processed = X_test_processed[list(training_columns)]

# =============================
# 🔹 Predict with Model
# =============================
pred = model.predict(X_test_processed)
test_df["Predicted_Hypertension"] = pred

# =============================
# 🔹 Show Results
# =============================
print("\n===== Predictions =====")
print(test_df[["Age", "Salt_Intake", "Stress_Score", "Predicted_Hypertension"]].head())

# =============================
# 🔹 Download Results File
# =============================
output_path = "test_results.csv"
test_df.to_csv(output_path, index=False)
files.download(output_path)


Upload your trained SVM model file (svm_model.pkl)


Saving xgb_hypertension_dailyMed.pkl to xgb_hypertension_dailyMed (1).pkl
Training columns loaded from model:
['Age' 'Salt_Intake' 'Stress_Score' 'Sleep_Duration' 'BMI'
 'Family_History' 'Smoking_Status' 'Systolic_BP' 'Diastolic_BP'
 'BP_History_Normal' 'BP_History_Prehypertension' 'Exercise_Level_Low'
 'Exercise_Level_Moderate']

Upload the test CSV dataset


Saving test_dataset_correct_1.csv to test_dataset_correct_1.csv
Testing on: test_dataset_correct_1.csv
   Age  Salt_Intake  Stress_Score  Sleep_Duration   BMI Family_History  \
0   41          9.7             4             4.9  29.3            Yes   
1   77          7.9             4             6.9  23.9            Yes   
2   71         11.4             2             8.4  31.1             No   
3   44          7.7             1             8.7  19.1            Yes   
4   74         11.7             6             7.1  18.8            Yes   

  Smoking_Status Exercise_Level       BP_History  Systolic_BP  Diastolic_BP  \
0     Non-Smoker       Moderate  Prehypertension          148           107   
1         Smoker            Low  Prehypertension          151            89   
2     Non-Smoker       Moderate  Prehypertension          154            68   
3         Smoker           High           Normal          105            84   
4     Non-Smoker            Low  Prehypertension         

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [20]:
# =============================
# 🔹 Load Libraries
# =============================
import pandas as pd
import joblib
from google.colab import files

# =============================
# 🔹 Upload Your Saved Model
# =============================
print("Upload your trained SVM model file (svm_model.pkl)")
uploaded = files.upload()

model = joblib.load(list(uploaded.keys())[0])

# Get training columns directly from the model
training_columns = model.feature_names_in_
print("Training columns loaded from model:")
print(training_columns)

# =============================
# 🔹 Upload Test Dataset
# =============================
print("\nUpload the test CSV dataset")
uploaded = files.upload()

test_file = list(uploaded.keys())[0]
test_df = pd.read_csv(test_file)

print("Testing on:", test_file)
print(test_df.head())

# =============================
# 🔹 Preprocess Data (Same as Training)
# =============================

# Remove target if included
if 'Has_Hypertension' in test_df.columns:
    X_test = test_df.drop("Has_Hypertension", axis=1)
else:
    X_test = test_df.copy()

# One-hot encoding categorical features
X_test_processed = pd.get_dummies(X_test)

# Add missing columns (important)
for col in training_columns:
    if col not in X_test_processed.columns:
        X_test_processed[col] = 0

# Keep correct column order
X_test_processed = X_test_processed[list(training_columns)]

# =============================
# 🔹 Predict with Model
# =============================
pred = model.predict(X_test_processed)
test_df["Predicted_Hypertension"] = pred

# =============================
# 🔹 Show Results
# =============================
print("\n===== Predictions =====")
print(test_df[["Age", "Salt_Intake", "Stress_Score", "Predicted_Hypertension"]].head())

# =============================
# 🔹 Download Results File
# =============================
output_path = "test_results.csv"
test_df.to_csv(output_path, index=False)
files.download(output_path)

Upload your trained SVM model file (svm_model.pkl)


Saving xgb_hypertension_dailyMed.pkl to xgb_hypertension_dailyMed (2).pkl
Training columns loaded from model:
['Age' 'Salt_Intake' 'Stress_Score' 'Sleep_Duration' 'BMI'
 'Family_History' 'Smoking_Status' 'Systolic_BP' 'Diastolic_BP'
 'BP_History_Normal' 'BP_History_Prehypertension' 'Exercise_Level_Low'
 'Exercise_Level_Moderate']

Upload the test CSV dataset


Saving test_dataset_correct_2.csv to test_dataset_correct_2.csv
Testing on: test_dataset_correct_2.csv
   Age  Salt_Intake  Stress_Score  Sleep_Duration   BMI Family_History  \
0   49          7.5             9             6.9  22.3            Yes   
1   50         10.9             2             8.7  20.7            Yes   
2   25          6.6             8             4.7  27.7             No   
3   60         10.9             8             4.3  23.7            Yes   
4   63         11.2             7             4.9  20.0             No   

  Smoking_Status Exercise_Level       BP_History  Systolic_BP  Diastolic_BP  \
0     Non-Smoker       Moderate  Prehypertension          139            96   
1     Non-Smoker       Moderate           Normal          118            87   
2     Non-Smoker       Moderate           Normal          109            64   
3         Smoker            Low           Normal          155           105   
4         Smoker       Moderate  Prehypertension         

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
# =============================
# 🔹 Load Libraries
# =============================
import pandas as pd
import joblib
import numpy as np
from google.colab import files

# --- NEW: Recommendation Logic ---
def generate_hypertension_recommendation(prediction_class, probability):
    """Generates an action and explanation based on the prediction and confidence."""
    if prediction_class == 1:
        # High-risk prediction
        action = "CONSULT DOCTOR"
        explanation = f"High Risk ({probability:.2f} confidence). Immediate medical follow-up is recommended."
    else:
        # Low-risk prediction
        action = "MONITOR"
        explanation = f"Low Risk ({1 - probability:.2f} confidence). Continue monitoring blood pressure regularly."

    return action, explanation


# =============================
# 🔹 Upload Your Saved Model
# =============================
print("Upload your trained SVM model file (svm_model.pkl)")
uploaded = files.upload()

model = joblib.load(list(uploaded.keys())[0])

# Get training columns directly from the model
training_columns = model.feature_names_in_
print("Training columns loaded from model:")
print(training_columns)

# =============================
# 🔹 Upload Test Dataset
# =============================
print("\nUpload the test CSV dataset")
uploaded = files.upload()

test_file = list(uploaded.keys())[0]
test_df = pd.read_csv(test_file)

print("Testing on:", test_file)
print(test_df.head())

# =============================
# 🔹 Preprocess Data (Same as Training)
# =============================

# Remove target if included
if 'Has_Hypertension' in test_df.columns:
    X_test = test_df.drop("Has_Hypertension", axis=1)
else:
    X_test = test_df.copy()

# One-hot encoding categorical features
X_test_processed = pd.get_dummies(X_test)

# Add missing columns (important for deployment stability)
for col in training_columns:
    if col not in X_test_processed.columns:
        X_test_processed[col] = 0

# Keep correct column order
X_test_processed = X_test_processed[list(training_columns)]

# =============================
# 🔹 Predict with Model (MODIFIED)
# =============================
# Predict the class (0 or 1)
pred = model.predict(X_test_processed)

# Predict the probability for class 1 (Hypertension risk score)
# Note: SVM models may require probability=True during training
try:
    proba = model.predict_proba(X_test_processed)[:, 1]
except AttributeError:
    # Fallback if the SVM model was trained without probability estimates
    print("\n⚠️ WARNING: predict_proba not available. Using class prediction only.")
    proba = pred # Use the class label as a placeholder confidence

# Apply the new recommendation function
recommendation_results = [generate_hypertension_recommendation(p_class, p_proba)
                          for p_class, p_proba in zip(pred, proba)]

# Unpack the results into new columns
test_df["Predicted_Hypertension"] = pred
test_df["Risk_Score"] = proba
test_df["Recommendation_Action"] = [r[0] for r in recommendation_results]
test_df["Recommendation_Explanation"] = [r[1] for r in recommendation_results]


# =============================
# 🔹 Show Results (MODIFIED)
# =============================
print("\n===== Predictions and Recommendations =====")

# Display the key input features, the risk score, and the final recommendation
print(test_df[[
    "Age",
    "Salt_Intake",
    "Predicted_Hypertension",
    "Risk_Score",
    "Recommendation_Action",
    "Recommendation_Explanation"
]].head())

# =============================
# 🔹 Download Results File
# =============================
output_path = "test_results_with_recommendations.csv"
test_df.to_csv(output_path, index=False)
files.download(output_path)

Upload your trained SVM model file (svm_model.pkl)


Saving xgb_hypertension_dailyMed.pkl to xgb_hypertension_dailyMed (3).pkl
Training columns loaded from model:
['Age' 'Salt_Intake' 'Stress_Score' 'Sleep_Duration' 'BMI'
 'Family_History' 'Smoking_Status' 'Systolic_BP' 'Diastolic_BP'
 'BP_History_Normal' 'BP_History_Prehypertension' 'Exercise_Level_Low'
 'Exercise_Level_Moderate']

Upload the test CSV dataset


Saving test_dataset_correct_1.csv to test_dataset_correct_1 (1).csv
Testing on: test_dataset_correct_1 (1).csv
   Age  Salt_Intake  Stress_Score  Sleep_Duration   BMI Family_History  \
0   41          9.7             4             4.9  29.3            Yes   
1   77          7.9             4             6.9  23.9            Yes   
2   71         11.4             2             8.4  31.1             No   
3   44          7.7             1             8.7  19.1            Yes   
4   74         11.7             6             7.1  18.8            Yes   

  Smoking_Status Exercise_Level       BP_History  Systolic_BP  Diastolic_BP  \
0     Non-Smoker       Moderate  Prehypertension          148           107   
1         Smoker            Low  Prehypertension          151            89   
2     Non-Smoker       Moderate  Prehypertension          154            68   
3         Smoker           High           Normal          105            84   
4     Non-Smoker            Low  Prehypertension 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>