In [1]:
# Colab Cell 1: Setup and Data Generation
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import gradio as gr
import joblib # For saving the model if needed externally

# --- 1. DATA SIMULATION (200 Synthetic People) ---
print("✅ Step 1: Generating Synthetic Data for 200 individuals...")
np.random.seed(42)
N = 200 # Number of people

data = {
    # Input Features (Based on your UI requirements)
    'Age': np.random.randint(25, 65, N),
    'BMI': np.round(np.random.uniform(18.5, 35.0, N), 1), # Healthy to Obese BMI range
    'Smoking_Status': np.random.randint(0, 2, N), # 0=No, 1=Yes (Current Smoker)
    'Exercise_Freq': np.random.randint(0, 6, N), # Times per week (0-5)
    'Diet_Quality': np.random.randint(1, 6, N), # 1=Poor, 5=Excellent
    'Stress_Level': np.random.randint(1, 6, N), # 1=Low, 5=High
}
df = pd.DataFrame(data)

# --- 2. GENERATE SYNTHETIC TARGET RISKS (0 to 1) ---
# A simplified formula to simulate medical logic (our model's targets)
# Higher Age, BMI, Smoking, Stress, and lower Exercise/Diet increase risk.
df['Risk_Diabetes'] = (0.015 * df['Age'] + 0.02 * (df['BMI'] - 20) + 0.15 * df['Smoking_Status'] + 0.02 * df['Stress_Level'] - 0.03 * df['Exercise_Freq'] - 0.01 * df['Diet_Quality'])
df['Risk_HeartDisease'] = df['Risk_Diabetes'] * 1.5 + 0.05
df['Risk_Stroke'] = df['Risk_Diabetes'] * 0.8
df['Risk_Hypertension'] = df['Risk_Diabetes'] * 1.2 + 0.03

# Clip risks to be between 5% and 55% for a realistic range
risk_cols = ['Risk_Diabetes', 'Risk_HeartDisease', 'Risk_Stroke', 'Risk_Hypertension']
df[risk_cols] = df[risk_cols].clip(lower=0.05, upper=0.55)

# Add minor noise for better training realism
for col in risk_cols:
    df[col] = df[col] + np.random.uniform(-0.02, 0.02, N)
    df[col] = df[col].clip(lower=0.05, upper=0.55) # Re-clip

# --- 3. TRAIN THE RANDOM FOREST MODEL ---
print("✅ Step 2: Training the Multi-Output Random Forest Model...")
FEATURES = ['Age', 'BMI', 'Smoking_Status', 'Exercise_Freq', 'Diet_Quality', 'Stress_Level']
TARGETS = risk_cols

X = df[FEATURES]
y = df[TARGETS]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Model Training Complete. Mean Absolute Error (MAE): {mae:.4f}\n")

# Optional: Save the model if you want to reuse it later outside Colab
# joblib.dump(model, 'health_forecast_model.pkl')
# print("Model saved as 'health_forecast_model.pkl'")
# Colab Cell 2: Prediction Function and Gradio UI Integration

# Define the risk categories for display
RISK_NAMES = {
    'Risk_Diabetes': 'Diabetes',
    'Risk_HeartDisease': 'Heart Disease',
    'Risk_Stroke': 'Stroke',
    'Risk_Hypertension': 'Hypertension'
}

def generate_health_forecast(age, bmi, smoking_status, exercise_freq, diet_quality, stress_level):
    """
    Takes user inputs, predicts health risks, and generates all UI outputs.
    """
    # 1. Prepare user input for the model
    user_input = pd.DataFrame([{
        'Age': age,
        'BMI': bmi,
        'Smoking_Status': smoking_status,
        'Exercise_Freq': exercise_freq,
        'Diet_Quality': diet_quality,
        'Stress_Level': stress_level
    }])

    # Ensure column order matches training
    user_input = user_input[FEATURES]

    # 2. Get baseline predictions
    baseline_predictions_raw = model.predict(user_input)[0]
    baseline_risks = {target: pred * 100 for target, pred in zip(TARGETS, baseline_predictions_raw)}

    # 3. Calculate Health Score and Years Potential
    # Simplified calculation
    avg_risk = np.mean(baseline_predictions_raw)
    health_score = int(85 - (avg_risk * 70)) # Scale to roughly 0-100
    years_potential = int(15 - (avg_risk * 20)) # Scale to a plausible range

    health_score_text = f"Health Score: {health_score}\n"
    years_potential_text = f"Years Potential: +{years_potential}\n"

    # 4. Generate 'What If' Scenarios and AI Recommendations
    what_if_outputs = []
    ai_recommendations = []

    # Scenario: Quit Smoking
    if smoking_status == 1: # Only if currently smoking
        quit_smoking_input = user_input.copy()
        quit_smoking_input['Smoking_Status'] = 0
        quit_smoking_predictions = model.predict(quit_smoking_input)[0]
        heart_risk_reduction_pct = ((baseline_risks['Risk_HeartDisease'] - (quit_smoking_predictions[TARGETS.index('Risk_HeartDisease')] * 100)) / baseline_risks['Risk_HeartDisease']) * 100
        what_if_outputs.append(f"Quit Smoking: Reduce Heart Disease Risk by {heart_risk_reduction_pct:.0f}% (Very High Impact)")
        ai_recommendations.append(f"Quitting smoking could add {np.random.randint(5,10)}+ years to your life. (Very High Impact)") # Simplified text

    # Scenario: Exercise More
    if exercise_freq < 4:
        exercise_input = user_input.copy()
        exercise_input['Exercise_Freq'] = 4 # Increase to 4x/week
        exercise_predictions = model.predict(exercise_input)[0]
        diabetes_risk_reduction_pct = ((baseline_risks['Risk_Diabetes'] - (exercise_predictions[TARGETS.index('Risk_Diabetes')] * 100)) / baseline_risks['Risk_Diabetes']) * 100
        what_if_outputs.append(f"Exercise 5x/week: Reduce Diabetes Risk by {diabetes_risk_reduction_pct:.0f}% (High Impact)")
        ai_recommendations.append(f"Exercise 3+ times weekly to reduce diabetes risk by {np.random.randint(20,40)}%. (High Impact)")

    # Scenario: Improve Diet (Mediterranean Diet)
    if diet_quality < 4:
        diet_input = user_input.copy()
        diet_input['Diet_Quality'] = 4 # Improve diet quality
        diet_predictions = model.predict(diet_input)[0]
        heart_risk_reduction_diet_pct = ((baseline_risks['Risk_HeartDisease'] - (diet_predictions[TARGETS.index('Risk_HeartDisease')] * 100)) / baseline_risks['Risk_HeartDisease']) * 100
        what_if_outputs.append(f"Mediterranean Diet: Reduce Heart Disease Risk by {heart_risk_reduction_diet_pct:.0f}% (High Impact)")
        ai_recommendations.append(f"Mediterranean diet can reduce heart disease risk by {np.random.randint(20,35)}%. (High Impact)")

    # Scenario: Reduce Stress
    if stress_level > 2:
        stress_input = user_input.copy()
        stress_input['Stress_Level'] = 2 # Reduce stress level
        stress_predictions = model.predict(stress_input)[0]
        hypertension_risk_reduction_pct = ((baseline_risks['Risk_Hypertension'] - (stress_predictions[TARGETS.index('Risk_Hypertension')] * 100)) / baseline_risks['Risk_Hypertension']) * 100
        what_if_outputs.append(f"Reduce Stress: Reduce Hypertension Risk by {hypertension_risk_reduction_pct:.0f}% (Moderate Impact)")
        ai_recommendations.append(f"Stress management can lower blood pressure significantly. (Moderate Impact)")


    # 5. Generate the 20-Year Risk Timeline Plot
    age_range = np.arange(age, age + 20, 1)
    risk_curve_df = pd.DataFrame({
        'Age': age_range,
        'BMI': bmi,
        'Smoking_Status': smoking_status,
        'Exercise_Freq': exercise_freq,
        'Diet_Quality': diet_quality,
        'Stress_Level': stress_level
    })
    risk_curve_df = risk_curve_df[FEATURES] # Ensure features order

    risk_predictions_over_time = model.predict(risk_curve_df)
    risk_curve_data = pd.DataFrame(risk_predictions_over_time, columns=TARGETS)
    risk_curve_data['Age'] = age_range

    plt.figure(figsize=(10, 6))
    for i, target in enumerate(TARGETS):
        plt.plot(risk_curve_data['Age'], risk_curve_data[target]*100, marker='o', markersize=3, linestyle='-', label=RISK_NAMES[target], alpha=0.8)

    plt.title('HealthForecast: Your 20-Year Risk Timeline', fontsize=16)
    plt.xlabel('Age', fontsize=12)
    plt.ylabel('Risk Percentage (%)', fontsize=12)
    plt.ylim(0, 60) # Consistent with UI screenshot scale
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)
    plt.legend(title="Disease Risk", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()

    # Gradio expects the plot object to be returned for image display
    plot_output = plt.gcf()
    plt.close() # Close the plot to prevent it from displaying twice

    # 6. Format Final Outputs for Gradio
    risk_display = "\n".join([f"{RISK_NAMES[target]}: {val:.1f}%" for target, val in baseline_risks.items()])
    what_if_display = "\n\n".join(what_if_outputs)
    ai_recommendations_display = "\n\n".join(ai_recommendations)

    return (
        f"**{health_score_text}**"
        f"**{years_potential_text}**",
        plot_output,
        f"**Baseline Risks:**\n{risk_display}",
        f"**What If Scenarios:**\n{what_if_display}",
        f"**AI Recommendations:**\n{ai_recommendations_display}"
    )

print("Prediction function `generate_health_forecast` defined.")
# Colab Cell 3: Launch Gradio Interface

# Define Gradio inputs to match your UI and the generate_health_forecast function
inputs = [
    gr.Slider(minimum=18, maximum=90, value=28, label="Age (Years)"),
    gr.Slider(minimum=15.0, maximum=50.0, value=26.0, step=0.1, label="BMI"),
    gr.Radio(choices=[( "No", 0), ("Current Smoker", 1)], value=1, label="Smoking Status"),
    gr.Slider(minimum=0, maximum=7, value=1.5, step=0.5, label="Exercise Frequency (times/week)"),
    gr.Dropdown(choices=[("Poor (Fast food, processed)", 1), ("Average", 3), ("Good (Balanced)", 5)], value=1, label="Diet Quality"),
    gr.Radio(choices=[("Low", 1), ("Moderate", 3), ("High", 5)], value=5, label="Stress Level"),
]

# Define Gradio outputs
outputs = [
    gr.Markdown(label="Health Score & Potential (UI Top Panel)"),
    gr.Plot(label="20-Year Health Forecast (UI Graph)"),
    gr.Markdown(label="Current Risk Profile"),
    gr.Markdown(label="Test 'What If' Scenarios (UI Cards)"),
    gr.Markdown(label="AI Recommendations (UI Bottom Panel)"),
]

# Create the Gradio Interface
health_forecast_interface = gr.Interface(
    fn=generate_health_forecast,
    inputs=inputs,
    outputs=outputs,
    title="HealthForecast: Your Predictive Health Timeline",
    description="Enter your health profile to get a 20-year forecast and personalized recommendations.",
    allow_flagging="never", # Disable flagging
    # Example input values matching your screenshot
    examples=[
        [28, 26, 1, 1.5, 1, 5]
    ]
)

# Launch the demo and generate the public URL
print("Launching HealthForecast Gradio demo...")
health_forecast_interface.launch(share=False)

ModuleNotFoundError: No module named 'gradio'