## Testing the server

In [2]:
import requests

SERVER_URL = "http://localhost:8000"

def get_root():
    response = requests.get(f"{SERVER_URL}/")
    return response.json()

## Testing the Inference

In [3]:
import pandas as pd

df = pd.read_csv("../Data/stroke_data_cleaned.csv")  # update with your path

# Show 5 random samples
df.sample(5)


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
2826,21162,Female,1.537935,0,0,Yes,Self-employed,Rural,-0.581612,-0.803683,Unknown,0
3387,65277,Female,1.537935,1,0,No,Self-employed,Rural,2.058363,0.05315,never smoked,0
2010,26247,Female,1.537935,0,0,Yes,Private,Rural,-0.169405,-1.604329,Unknown,0
4731,17130,Female,-0.89457,0,0,No,Private,Rural,-0.735776,0.193614,never smoked,0
4225,14658,Female,-0.275387,0,0,Yes,Private,Rural,-0.719516,2.469137,Unknown,0


In [21]:
# Define mappings for categorical variables
work_type_mapping = {
    "Private": 0,
    "Self-employed": 1,
    "Govt_job": 2,
    "children": 3,
    "Never_worked": 4
}

ever_married_mapping = {
    "Yes": 1,
    "No": 0
}

residence_type_mapping = {
    "Urban": 1,
    "Rural": 0
}

smoking_status_mapping = {
    "smokes": 1,
    "formerly smoked": 2,
    "never smoked": 0,
    "Unknown": 3
}

def predict_stroke_df(df):
    """
    Takes a DataFrame of stroke patient data, encodes categorical columns,
    sends each row to the server, and returns predictions in a new DataFrame.
    """
    results = []

    for _, row in df.iterrows():
        # Convert row to numeric payload
        patient = {
            "age": float(row["age"]),
            "hypertension": int(row["hypertension"]),
            "heart_disease": int(row["heart_disease"]),
            "ever_married": ever_married_mapping[row["ever_married"]],
            "work_type": work_type_mapping[row["work_type"]],
            "residence_type": residence_type_mapping[row["Residence_type"]],
            "avg_glucose_level": float(row["avg_glucose_level"]),
            "bmi": float(row["bmi"]),
            "smoking_status": smoking_status_mapping[row["smoking_status"]]
        }

        # Send to server
        response = requests.post(f"{SERVER_URL}/predict/", json=patient)
        if response.status_code == 200:
            prediction = response.json()
            results.append({**patient, **prediction})
        else:
            results.append({**patient, "stroke_prediction": None, "stroke_probability": None})

    return pd.DataFrame(results)


In [22]:
# Select 5 random samples to test
sample_df = df.sample(5).reset_index(drop=True)

# Predict strokes
predictions_df = predict_stroke_df(sample_df)
predictions_df


Unnamed: 0,age,hypertension,heart_disease,ever_married,work_type,residence_type,avg_glucose_level,bmi,smoking_status,stroke_prediction,stroke_probability
0,-0.673433,0,0,1,0,1,0.266588,-0.115407,3,,
1,-1.425298,0,0,0,3,0,0.606832,-0.368243,3,,
2,-1.876418,0,0,0,3,1,0.16843,-1.857165,3,,
3,0.388024,0,0,1,0,1,0.166924,-0.887961,0,,
4,0.918752,0,1,1,0,0,-0.545179,0.109336,0,,


In [24]:
from sklearn.preprocessing import OneHotEncoder

# Identify categorical columns
categorical_cols = ["gender", "ever_married", "work_type", "Residence_type", "smoking_status"]

# Numeric columns
numeric_cols = ["age", "hypertension", "heart_disease", "avg_glucose_level", "bmi"]


# Initialize encoder
encoder = OneHotEncoder(sparse_output=False, drop=None)  # keep all categories

# Fit and transform categorical columns
encoded_array = encoder.fit_transform(df[categorical_cols])

# Get feature names
encoded_feature_names = encoder.get_feature_names_out(categorical_cols)

# Convert to DataFrame
df_encoded = pd.DataFrame(encoded_array, columns=encoded_feature_names)

# Combine with numeric columns
df_model_input = pd.concat([df[numeric_cols].reset_index(drop=True), df_encoded.reset_index(drop=True)], axis=1)

print("Model input shape:", df_model_input.shape)


Model input shape: (5110, 21)


In [30]:
def predict_stroke_df(df_encoded):
    """
    Takes a DataFrame where categorical columns are already one-hot encoded,
    sends each row to the server, and returns predictions in a new DataFrame.
    
    Parameters:
    - df_encoded: DataFrame with numeric features (after one-hot encoding)
    - server_url: URL of the server, e.g., "http://127.0.0.1:8000/predict/"
    """
    results = []

    for _, row in df_encoded.iterrows():
        # Convert row to numeric payload (all columns are numeric)
        patient = row.to_dict()

        # Send to server
        response = requests.post(f"{SERVER_URL}", json=patient)
        if response.status_code == 200:
            prediction = response.json()
            results.append({**patient, **prediction})
        else:
            results.append({**patient, "stroke_prediction": None, "stroke_probability": None})

    return pd.DataFrame(results)


In [36]:
# Select 5 samples
sample_df = df_model_input.sample(5).reset_index(drop=True)

# Predict strokes
predictions_df = predict_stroke_df(sample_df)
predictions_df


Unnamed: 0,age,hypertension,heart_disease,avg_glucose_level,bmi,gender_Female,gender_Male,gender_Other,ever_married_No,ever_married_Yes,...,work_type_Self-employed,work_type_children,Residence_type_Rural,Residence_type_Urban,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,stroke_prediction,stroke_probability
0,-0.010023,0.0,0.0,-0.608109,2.441044,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,,
1,1.007207,0.0,0.0,-0.452139,-0.101361,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,,
2,0.520706,0.0,0.0,-0.621659,-0.003036,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,,
3,1.228344,0.0,0.0,2.058363,0.432404,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,
4,-1.027252,0.0,0.0,0.712518,-0.199686,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,,


In [4]:
def predict_stroke_df(df: pd.DataFrame):
    """
    Takes a DataFrame of stroke patient data (raw features),
    sends each row to the server, and returns predictions in a new DataFrame.

    Expected columns in df:
        age, hypertension, heart_disease, ever_married,
        work_type, residence_type, avg_glucose_level, bmi, smoking_status
    """
    results = []

    for _, row in df.iterrows():
        # Convert row to dictionary (JSON payload)
        patient = {
            "age": float(row["age"]),
            "hypertension": int(row["hypertension"]),
            "heart_disease": int(row["heart_disease"]),
            "ever_married": str(row["ever_married"]),
            "work_type": str(row["work_type"]),
            "Residence_type": str(row["Residence_type"]),
            "avg_glucose_level": float(row["avg_glucose_level"]),
            "bmi": float(row["bmi"]),
            "smoking_status": str(row["smoking_status"])
        }

        # Send to server
        response = requests.post(f"{SERVER_URL}/predict/", json=patient)
        if response.status_code == 200:
            prediction = response.json()
            results.append({**patient, **prediction})
        else:
            results.append({**patient, "stroke_prediction": None, "stroke_probability": None})

    return pd.DataFrame(results)


In [5]:
# Suppose df_sample is your raw stroke dataset
sample_df = df.sample(5).reset_index(drop=True)

# Get predictions
predictions_df = predict_stroke_df(sample_df)

# Show results
print(predictions_df)

        age  hypertension  heart_disease ever_married work_type  \
0  0.962980             0              0          Yes   Private   
1 -0.142705             0              0          Yes   Private   
2  1.007207             0              0          Yes   Private   
3 -1.901185             0              0           No  children   
4  0.609161             0              0          Yes   Private   

  Residence_type  avg_glucose_level       bmi   smoking_status  \
0          Rural           2.058363  1.823001     never smoked   
1          Rural           2.058363 -0.115407           smokes   
2          Urban          -0.629186  0.460497     never smoked   
3          Urban          -0.352776 -2.039769          Unknown   
4          Rural           1.042826  1.050447  formerly smoked   

  stroke_prediction stroke_probability  
0              None               None  
1              None               None  
2              None               None  
3              None               No