In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")


# Step 1: Load dataset and preprocessing

In [4]:
df = pd.read_csv("/content/drive/MyDrive/healthcare-dataset-stroke-data.csv")
df.drop(columns=["id"], inplace=True)
df['bmi'] = df['bmi'].fillna(df['bmi'].median())

# One-hot encoding

In [5]:
categorical = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
df = pd.get_dummies(df, columns=categorical, drop_first=True)

# Features and target

In [6]:
X = df.drop("stroke", axis=1)
y = df["stroke"]


# Step 2: Handle imbalance with SMOTE

In [7]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)


# Step 3: Train-test split and scaling

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, stratify=y_res, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Step 4: Train the model

In [9]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 5: Evaluate model

In [10]:
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
auc_score = roc_auc_score(y_test, y_proba)

print("\n📊 Model Evaluation Metrics:")
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"🏥 AUC Score: {auc_score:.4f}")



📊 Model Evaluation Metrics:
✅ Accuracy: 0.9614
🏥 AUC Score: 0.9943


# Step 6: Take user input and predict

In [16]:
def get_user_input():
    print("\n🔍 Enter your medical details for stroke prediction:\n")

    age = float(input("Age: "))
    hypertension = int(input("Hypertension (0 = No, 1 = Yes): "))
    heart_disease = int(input("Heart Disease (0 = No, 1 = Yes): "))
    ever_married = input("Ever Married? (Yes/No): ").strip().capitalize()
    work_type = input("Work Type (Private/Self-employed/Govt_job/children/Never_worked): ").strip()
    residence_type = input("Residence Type (Urban/Rural): ").strip()
    avg_glucose = float(input("Average Glucose Level: "))
    bmi = float(input("BMI: "))
    smoking_status = input("Smoking Status (formerly smoked/never smoked/smokes): ").strip()
    gender = input("Gender (Male/Female/Other): ").strip()

    input_dict = {
        'age': age,
        'hypertension': hypertension,
        'heart_disease': heart_disease,
        'avg_glucose_level': avg_glucose,
        'bmi': bmi,
        'ever_married_Yes': 1 if ever_married == 'Yes' else 0,
        'work_type_Private': 0,
        'work_type_Self-employed': 0,
        'work_type_children': 0,
        'work_type_Never_worked': 0,
        'Residence_type_Urban': 1 if residence_type == 'Urban' else 0,
        'smoking_status_formerly smoked': 0,
        'smoking_status_never smoked': 0,
        'smoking_status_smokes': 0,
        'gender_Male': 0,
        'gender_Other': 0
    }

    # Handle work_type encoding
    if work_type == 'Private':
        input_dict['work_type_Private'] = 1
    elif work_type == 'Self-employed':
        input_dict['work_type_Self-employed'] = 1
    elif work_type == 'children':
        input_dict['work_type_children'] = 1
    elif work_type == 'Never_worked':
        input_dict['work_type_Never_worked'] = 1

    # Handle smoking_status encoding
    if smoking_status == 'formerly smoked':
        input_dict['smoking_status_formerly smoked'] = 1
    elif smoking_status == 'never smoked':
        input_dict['smoking_status_never smoked'] = 1
    elif smoking_status == 'smokes':
        input_dict['smoking_status_smokes'] = 1

    # Handle gender encoding
    if gender == 'Male':
        input_dict['gender_Male'] = 1
    elif gender == 'Other':
        input_dict['gender_Other'] = 1

    return input_dict

# Step 7: Predict for user input


In [28]:
# Convert user input to DataFrame
user_df = pd.DataFrame([user_data])

# Ensure the order of columns matches the training data
user_df = user_df[X.columns]

# Scale the user input
user_scaled = scaler.transform(user_df)
user_data = get_user_input()
# Predict stroke risk
prediction = model.predict(user_scaled)
prediction_proba = model.predict_proba(user_scaled)[:, 1]

print("\n🧠 Stroke Prediction Results:")
if prediction[0] == 1:
    print("❗ Based on the provided information, there is a predicted risk of stroke.")
else:
    print("✅ Based on the provided information, there is no predicted risk of stroke.")

print(f"📊 Probability of stroke: {prediction_proba[0]:.4f}")


🔍 Enter your medical details for stroke prediction:

Age: 33
Hypertension (0 = No, 1 = Yes): 1
Heart Disease (0 = No, 1 = Yes): 1
Ever Married? (Yes/No): Yes
Work Type (Private/Self-employed/Govt_job/children/Never_worked): Never_worked
Residence Type (Urban/Rural): Rural
Average Glucose Level: 167
BMI: 67
Smoking Status (formerly smoked/never smoked/smokes): smokes
Gender (Male/Female/Other): Male

🧠 Stroke Prediction Results:
✅ Based on the provided information, there is no predicted risk of stroke.
📊 Probability of stroke: 0.0900
