<a href="https://colab.research.google.com/github/PRIYANSHU-8055/Projects/blob/main/FINAL_JMD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd  # Import pandas and assign it to the alias 'pd'
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from sklearn.impute import SimpleImputer
import requests

# Function to fetch AQI based on state input (example implementation)
def get_aqi_for_state(state):
    # Replace with real API call and key
    # Example response simulation:
    simulated_aqi = {
        "Delhi": 320,
        "Kerala": 80,
        "Punjab": 180,
        "Maharashtra": 210,
        "Rajasthan": 240
    }
    return simulated_aqi.get(state, 150)  # Default AQI if state not found

# Reload full dataset for consistency
df = pd.read_csv("/content/CVD_cleaned.csv")

# Create BMI column if missing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

# Simulate AQI if needed
np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

# Drop Unnamed column if exists
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Convert binary yes/no columns
binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Define columns
numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

# Features and target
X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Preprocessing pipeline
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Transform
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Train Models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss').fit(X_train, y_train)

# Predict
test_preds_rf = rf_model.predict(X_test)
test_preds_xgb = xgb_model.predict(X_test)

# Evaluation function
def evaluate_model(name, y_true, y_pred):
    print(f"\n=== {name} ===")
    print("Classification Report:\n", classification_report(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

# Evaluate all
evaluate_model("Random Forest", y_test, test_preds_rf)
evaluate_model("XGBoost", y_test, test_preds_xgb)

import requests

# Function to get AQI based on state/city
def fetch_aqi(state, api_key):
    try:
        url = f"https://api.waqi.info/feed/{state}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            print("❌ Error fetching AQI:", data.get("data", "Unknown error"))
            return 150  # fallback default AQI
    except Exception as e:
        print("❌ Exception during AQI fetch:", e)
        return 150  # fallback AQI in case of error

# 🔑 INSERT YOUR API KEY HERE
API_KEY = ""  # <-- Replace this with your actual AQICN API key
USER_STATE = "delhi"  # You can dynamically get this from user input

# Fetch AQI for the provided state
user_aqi = fetch_aqi(USER_STATE, API_KEY)


# User input
user_input = {
    'Height_(cm)': 160,
    'Weight_(kg)': 90,
    'Alcohol_Consumption': 5,
    'Fruit_Consumption': 0,
    'Green_Vegetables_Consumption': 0,
    'FriedPotato_Consumption': 6,
    'General_Health': 'Poor',
    'Checkup': '5 or more years ago',
    'Sex': 'Female',
    'Age_Category': '70-74',
    'Exercise': 0,
    'Skin_Cancer': 1,
    'Other_Cancer': 1,
    'Smoking_History': 1,
    'Depression': 1,
    'Diabetes': 1,
    'Arthritis': 1,
    'State': 'Delhi'
}

# Compute AQI from state
user_input['AQI'] = get_aqi_for_state(user_input['State'])
user_input['BMI'] = user_input['Weight_(kg)'] / ((user_input['Height_(cm)'] / 100) ** 2)
user_input['High_Pollution'] = 1 if user_input['AQI'] >= 200 else 0

# Create DataFrame
user_df = pd.DataFrame([user_input])
X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]

# Preprocess
X_user_transformed = preprocessor.transform(X_user)

# Predict
rf_pred = rf_model.predict(X_user_transformed)[0]
xgb_pred = xgb_model.predict(X_user_transformed)[0]

rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

print(f"\n🧪 Random Forest Risk Probability: {rf_prob:.2f}")
print(f"🧪 XGBoost Risk Probability: {xgb_prob:.2f}")

# Print result
print("\n🏥 Risk Prediction (0 = No Heart Disease, 1 = At Risk):")
print(f"Random Forest Prediction: {rf_pred}")
print(f"XGBoost Prediction: {xgb_pred}")


Parameters: { "use_label_encoder" } are not used.




=== Random Forest ===
Classification Report:
               precision    recall  f1-score   support

           0       0.92      1.00      0.96     56777
           1       0.49      0.04      0.07      4994

    accuracy                           0.92     61771
   macro avg       0.70      0.52      0.51     61771
weighted avg       0.89      0.92      0.89     61771

Confusion Matrix:
 [[56575   202]
 [ 4802   192]]

=== XGBoost ===
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.99      0.96     56777
           1       0.47      0.06      0.10      4994

    accuracy                           0.92     61771
   macro avg       0.69      0.53      0.53     61771
weighted avg       0.89      0.92      0.89     61771

Confusion Matrix:
 [[56446   331]
 [ 4706   288]]

🧪 Random Forest Risk Probability: 0.28
🧪 XGBoost Risk Probability: 0.55

🏥 Risk Prediction (0 = No Heart Disease, 1 = At Risk):
Random Forest Prediction: 0
X

API KEY

In [None]:
f8a67eddd6d296d5d3bb20b6fe6daec84a600e5b

In [None]:
import requests

def get_air_quality_message(air_quality_index: int) -> None:
    messages = {
        (0, 50): "[+] GOOD • \nAir quality is satisfactory, and air pollution poses little or no risk.",
        (51, 100): "[+] MODERATE • \nAir quality is acceptable; however, there may be a moderate health concern for a very small number of people.",
        (101, 150): "[-] UNHEALTHY for Sensitive groups • \nMembers of sensitive groups may experience health effects.",
        (151, 200): "[-] UNHEALTHY • \nEveryone may begin to experience health effects.",
        (201, 300): "[!] VERY UNHEALTHY • \nHealth warnings of emergency conditions.",
        (301, float('inf')): "[!!] HAZARDOUS • \nHealth alert: everyone may experience more serious health effects."
    }

    for range_limits, msg in messages.items():
        if range_limits[0] <= air_quality_index <= range_limits[1]:
            print(msg)
            return
    print("Invalid air quality index.")
    return None

def main():
    api_key = 'f8a67eddd6d296d5d3bb20b6fe6daec84a600e5b'  # Note: It's not secure to include API keys directly in code
    country_state = input("Country or state name: ").lower().strip()
    url = f"https://api.waqi.info/feed/{country_state}/?token={api_key}"

    try:
        response = requests.get(url)
        data = response.json()

        if data['status'] != 'ok':
            print(f"Error: {data.get('data', 'Unknown error')}")
            return

        air_quality_index = data['data']['aqi']
        geo = data['data']['city']['geo']
        lan, lon = geo[0], geo[1]
        name = data['data']['city']['name']

        print(f"COORD: {lan}, {lon}")
        print(f"Location: {name}")
        print(f"AIR QUALITY INDEX: {air_quality_index}")

        get_air_quality_message(air_quality_index)
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Country or state name: austin
COORD: 30.3544356, -97.7602554
Location: Austin Northwest, Austin, Texas
AIR QUALITY INDEX: 65
[+] MODERATE • 
Air quality is acceptable; however, there may be a moderate health concern for a very small number of people.


In [None]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Function to fetch AQI from city using API
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            print("❌ Error fetching AQI:", data.get("data", "Unknown error"))
            return 150  # fallback default AQI
    except Exception as e:
        print("❌ Exception during AQI fetch:", e)
        return 150

# Function to display AQI message
def get_air_quality_message(air_quality_index):
    messages = {
        (0, 50): "[+] GOOD • \nAir quality is satisfactory, and air pollution poses little or no risk.",
        (51, 100): "[+] MODERATE • \nAir quality is acceptable; however, there may be a moderate health concern for a very small number of people.",
        (101, 150): "[-] UNHEALTHY for Sensitive groups • \nMembers of sensitive groups may experience health effects.",
        (151, 200): "[-] UNHEALTHY • \nEveryone may begin to experience health effects.",
        (201, 300): "[!] VERY UNHEALTHY • \nHealth warnings of emergency conditions.",
        (301, float('inf')): "[!!] HAZARDOUS • \nHealth alert: everyone may experience more serious health effects."
    }
    for range_limits, msg in messages.items():
        if range_limits[0] <= air_quality_index <= range_limits[1]:
            print(msg)
            return
    print("Invalid air quality index.")
    return None

# ==================== DATA PREPARATION ====================
df = pd.read_csv("/content/CVD_cleaned.csv")

# Create BMI column if missing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

# Simulate AQI if needed
np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

# Drop unwanted column
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Convert binary yes/no columns
binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Feature selection
numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Preprocessing pipelines
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Transform
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Train models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss').fit(X_train, y_train)

# ==================== USER INPUT ====================

# API Key (insert your AQICN API key)
API_KEY = 'f8a67eddd6d296d5d3bb20b6fe6daec84a600e5b'  # <--- PUT your API Key here

# Get city from user
city_input = input("Enter your city for AQI check: ").lower().strip()

# Fetch AQI from city
user_aqi = fetch_aqi(city_input, API_KEY)

# Show AQI info
print(f"\n🌎 City: {city_input.capitalize()}")
print(f"📈 AQI Value: {user_aqi}")
get_air_quality_message(user_aqi)

# Other user inputs manually
user_input = {
    'Height_(cm)': 160,
    'Weight_(kg)': 90,
    'Alcohol_Consumption': 5,
    'Fruit_Consumption': 0,
    'Green_Vegetables_Consumption': 0,
    'FriedPotato_Consumption': 6,
    'General_Health': 'Poor',
    'Checkup': '5 or more years ago',
    'Sex': 'Male',
    'Age_Category': '70-74',
    'Exercise': 0,
    'Skin_Cancer': 1,
    'Other_Cancer': 1,
    'Smoking_History': 1,
    'Depression': 1,
    'Diabetes': 1,
    'Arthritis': 1
}

# Add AQI and computed features
user_input['AQI'] = user_aqi
user_input['BMI'] = user_input['Weight_(kg)'] / ((user_input['Height_(cm)'] / 100) ** 2)
user_input['High_Pollution'] = 1 if user_aqi >= 200 else 0

# Prepare DataFrame
user_df = pd.DataFrame([user_input])
X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]

# Preprocess user data
X_user_transformed = preprocessor.transform(X_user)

# Predict
rf_pred = rf_model.predict(X_user_transformed)[0]
xgb_pred = xgb_model.predict(X_user_transformed)[0]

rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

# ==================== OUTPUT ====================

print(f"\n🧪 Random Forest Risk Probability: {rf_prob:.2f}")
print(f"🧪 XGBoost Risk Probability: {xgb_prob:.2f}")

print("\n🏥 Risk Prediction (0 = No Heart Disease, 1 = At Risk):")
print(f"Random Forest Prediction: {rf_pred}")
print(f"XGBoost Prediction: {xgb_pred}")


Parameters: { "use_label_encoder" } are not used.



Enter your city for AQI check: chandigarh

🌎 City: Chandigarh
📈 AQI Value: 102
[-] UNHEALTHY for Sensitive groups • 
Members of sensitive groups may experience health effects.

🧪 Random Forest Risk Probability: 0.52
🧪 XGBoost Risk Probability: 0.80

🏥 Risk Prediction (0 = No Heart Disease, 1 = At Risk):
Random Forest Prediction: 1
XGBoost Prediction: 1


WITH GRADIO

In [None]:
# 🚀 All necessary imports
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.impute import SimpleImputer

# 🔑 INSERT YOUR API KEY
API_KEY = ""  # <-- Replace with your actual AQICN API key

# 📦 Load Dataset
df = pd.read_csv("/content/CVD_cleaned.csv")

# 🛠️ Data Preprocessing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# 📊 Train/Test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss').fit(X_train, y_train)

# 🔎 AQI Fetch function
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            print("❌ Error fetching AQI:", data.get("data", "Unknown error"))
            return 150
    except Exception as e:
        print("❌ Exception during AQI fetch:", e)
        return 150

# 💬 Air Quality Message (Optional nice touch)
def get_air_quality_message(aqi):
    if 0 <= aqi <= 50:
        return "Good 🌿"
    elif 51 <= aqi <= 100:
        return "Moderate 🍃"
    elif 101 <= aqi <= 150:
        return "Unhealthy for Sensitive Groups ⚠️"
    elif 151 <= aqi <= 200:
        return "Unhealthy 🚫"
    elif 201 <= aqi <= 300:
        return "Very Unhealthy ❗"
    else:
        return "Hazardous ☠️"

# 🎯 Main Prediction Function
def predict_heart_disease(height, weight, alcohol, fruits, veggies, fried, general_health, checkup, sex, age_category,
                           exercise, skin_cancer, other_cancer, smoking_history, depression, diabetes, arthritis, city):

    # Fetch real AQI
    user_aqi = fetch_aqi(city, API_KEY)

    # Compute BMI
    bmi = weight / ((height / 100) ** 2)

    # High Pollution
    high_pollution = 1 if user_aqi >= 200 else 0

    # Prepare Input
    user_input = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruits,
        'Green_Vegetables_Consumption': veggies,
        'FriedPotato_Consumption': fried,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age_category,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking_history,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'AQI': user_aqi,
        'BMI': bmi,
        'High_Pollution': high_pollution
    }

    user_df = pd.DataFrame([user_input])
    X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]
    X_user_transformed = preprocessor.transform(X_user)

    # Predictions
    rf_pred = rf_model.predict(X_user_transformed)[0]
    xgb_pred = xgb_model.predict(X_user_transformed)[0]

    rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
    xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

    # Final Result
    result = f"""
    🏥 **Heart Disease Risk Prediction**:
    - Random Forest Risk: **{rf_prob:.2f}**
    - XGBoost Risk: **{xgb_prob:.2f}**

    🌎 **Air Quality in {city.title()}:** {user_aqi} ({get_air_quality_message(user_aqi)})
    """

    return result

# 🖥️ Gradio Interface
interface = gr.Interface(
    fn=predict_heart_disease,
    inputs=[
        gr.Number(label="Height (cm)"),
        gr.Number(label="Weight (kg)"),
        gr.Number(label="Alcohol Consumption (drinks/week)"),
        gr.Number(label="Fruit Consumption (times/week)"),
        gr.Number(label="Green Vegetables Consumption (times/week)"),
        gr.Number(label="Fried Potato Consumption (times/week)"),
        gr.Dropdown(['Poor', 'Fair', 'Good', 'Very Good', 'Excellent'], label="General Health"),
        gr.Dropdown(['Never', 'Within the past year', 'Within the past 2 years', 'Within the past 5 years', '5 or more years ago'], label="Last Checkup"),
        gr.Dropdown(['Male', 'Female'], label="Sex"),
        gr.Dropdown(['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'], label="Age Category"),
        gr.Radio([0, 1], label="Exercise (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Skin Cancer History (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Other Cancer History (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Smoking History (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Depression (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Diabetes (0=No, 1=Yes)"),
        gr.Radio([0, 1], label="Arthritis (0=No, 1=Yes)"),
        gr.Textbox(label="City Name (for AQI)")
    ],
    outputs="markdown",
    title="💖 AI Heart Disease Risk Prediction + AQI Check",
    description="Enter your health details and city to predict your risk of heart disease and see the air quality index."
)

interface.launch()


Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://50db9608996513baca.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.impute import SimpleImputer

# 🔑 INSERT YOUR API KEY
API_KEY = ""  # <-- Put your AQI API Key here

# 📦 Load Dataset
df = pd.read_csv("/content/CVD_cleaned.csv")

# 🛠️ Data Preprocessing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss').fit(X_train, y_train)

# 🔎 AQI Fetch
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            print("❌ Error fetching AQI:", data.get("data", "Unknown error"))
            return 150
    except Exception as e:
        print("❌ Exception during AQI fetch:", e)
        return 150

def get_air_quality_message(aqi):
    if 0 <= aqi <= 50:
        return "Good 🌿"
    elif 51 <= aqi <= 100:
        return "Moderate 🍃"
    elif 101 <= aqi <= 150:
        return "Unhealthy for Sensitive Groups ⚠️"
    elif 151 <= aqi <= 200:
        return "Unhealthy 🚫"
    elif 201 <= aqi <= 300:
        return "Very Unhealthy ❗"
    else:
        return "Hazardous ☠️"

# 🎯 Prediction
def predict_heart_disease(height, weight, alcohol, fruits, veggies, fried, general_health, checkup, sex, age_category,
                           exercise, skin_cancer, other_cancer, smoking_history, depression, diabetes, arthritis, city):

    user_aqi = fetch_aqi(city, API_KEY)
    bmi = weight / ((height / 100) ** 2)
    high_pollution = 1 if user_aqi >= 200 else 0

    user_input = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruits,
        'Green_Vegetables_Consumption': veggies,
        'FriedPotato_Consumption': fried,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age_category,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking_history,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'AQI': user_aqi,
        'BMI': bmi,
        'High_Pollution': high_pollution
    }

    user_df = pd.DataFrame([user_input])
    X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]
    X_user_transformed = preprocessor.transform(X_user)

    rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
    xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

    result = f"""
    # 🏥 Heart Disease Risk Prediction

    - **Random Forest Risk:** `{rf_prob:.2%}`
    - **XGBoost Risk:** `{xgb_prob:.2%}`

    ---

    # 🌍 Air Quality Information
    - **City:** `{city.title()}`
    - **AQI Level:** `{user_aqi}`
    - **Condition:** `{get_air_quality_message(user_aqi)}`
    """

    return result

# 🖥️ Gradio Professional Interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink")) as demo:

    gr.Markdown("# 💖 AI Health Risk and Air Quality Check")
    gr.Markdown("Get your **heart disease risk prediction** and **live air quality information** based on your health and city.")

    with gr.Row():
        with gr.Column():
            height = gr.Number(label="📏 Height (cm)")
            weight = gr.Number(label="⚖️ Weight (kg)")
            alcohol = gr.Number(label="🍺 Alcohol Consumption (drinks/week)")
            fruits = gr.Number(label="🍎 Fruit Consumption (times/week)")
            veggies = gr.Number(label="🥦 Green Vegetables Consumption (times/week)")
            fried = gr.Number(label="🍟 Fried Potato Consumption (times/week)")
            exercise = gr.Radio([0,1], label="🏃 Exercise? (0=No, 1=Yes)")
            smoking_history = gr.Radio([0,1], label="🚬 Smoking History? (0=No, 1=Yes)")

        with gr.Column():
            general_health = gr.Dropdown(['Poor', 'Fair', 'Good', 'Very Good', 'Excellent'], label="🩺 General Health")
            checkup = gr.Dropdown(['Never', 'Within the past year', 'Within the past 2 years', 'Within the past 5 years', '5 or more years ago'], label="🔎 Last Checkup")
            sex = gr.Dropdown(['Male', 'Female'], label="👤 Sex")
            age_category = gr.Dropdown(['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'], label="🎂 Age Category")
            skin_cancer = gr.Radio([0,1], label="🧬 Skin Cancer History? (0=No, 1=Yes)")
            other_cancer = gr.Radio([0,1], label="🧬 Other Cancer History? (0=No, 1=Yes)")
            depression = gr.Radio([0,1], label="🧠 Depression History? (0=No, 1=Yes)")
            diabetes = gr.Radio([0,1], label="🩸 Diabetes History? (0=No, 1=Yes)")
            arthritis = gr.Radio([0,1], label="🦴 Arthritis? (0=No, 1=Yes)")
            city = gr.Textbox(label="🏙️ City for AQI Check")

    submit = gr.Button("🔍 Predict Risk")
    output = gr.Markdown()

    submit.click(
        predict_heart_disease,
        inputs=[height, weight, alcohol, fruits, veggies, fried, general_health, checkup, sex, age_category,
                exercise, skin_cancer, other_cancer, smoking_history, depression, diabetes, arthritis, city],
        outputs=output
    )

demo.launch()


Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://781983485920414d49.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




ultra pro version dark mode csv upload


In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer

# Insert your API key here
API_KEY = ""  # <-- Put your AQI API Key here

# Load dataset
df = pd.read_csv("/content/CVD_cleaned.csv")

if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss').fit(X_train, y_train)

# Fetch AQI
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            print("Error fetching AQI:", data.get("data", "Unknown error"))
            return 150
    except Exception as e:
        print("Exception:", e)
        return 150

def get_air_quality_message(aqi):
    if 0 <= aqi <= 50:
        return "Good 🌿"
    elif 51 <= aqi <= 100:
        return "Moderate 🍃"
    elif 101 <= aqi <= 150:
        return "Unhealthy for Sensitive Groups ⚠️"
    elif 151 <= aqi <= 200:
        return "Unhealthy 🚫"
    elif 201 <= aqi <= 300:
        return "Very Unhealthy ❗"
    else:
        return "Hazardous ☠️"

# Single Prediction
def predict_heart_disease(height, weight, alcohol, fruits, veggies, fried, general_health, checkup, sex, age_category,
                           exercise, skin_cancer, other_cancer, smoking_history, depression, diabetes, arthritis, city):

    user_aqi = fetch_aqi(city, API_KEY)
    bmi = weight / ((height / 100) ** 2)
    high_pollution = 1 if user_aqi >= 200 else 0

    user_input = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruits,
        'Green_Vegetables_Consumption': veggies,
        'FriedPotato_Consumption': fried,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age_category,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking_history,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'AQI': user_aqi,
        'BMI': bmi,
        'High_Pollution': high_pollution
    }

    user_df = pd.DataFrame([user_input])
    X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]
    X_user_transformed = preprocessor.transform(X_user)

    rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
    xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

    result = f"""
    # 🏥 Heart Disease Risk

    - **Random Forest Risk:** `{rf_prob:.2%}`
    - **XGBoost Risk:** `{xgb_prob:.2%}`

    ---

      **print("\n🏥 Risk Prediction (0 = No Heart Disease, 1 = At Risk):")`
      **print(f"Random Forest Prediction: {rf_pred}")`
      **print(f"XGBoost Prediction: {xgb_pred}")`


    # 🌍 AQI Info

    - **City:** `{city.title()}`
    - **AQI:** `{user_aqi}`
    - **Condition:** `{get_air_quality_message(user_aqi)}`
    """

    return result

# Batch CSV Prediction
def predict_batch(file):
    df = pd.read_csv(file.name)
    results = []

    for _, row in df.iterrows():
        bmi = row['Weight_(kg)'] / ((row['Height_(cm)'] / 100) ** 2)
        aqi = fetch_aqi(row['City'], API_KEY)
        high_pollution = 1 if aqi >= 200 else 0

        user_input = row.to_dict()
        user_input['BMI'] = bmi
        user_input['AQI'] = aqi
        user_input['High_Pollution'] = high_pollution

        user_df = pd.DataFrame([user_input])
        X_user = user_df[numerical_cols + categorical_cols + binary_feature_cols]
        X_user_transformed = preprocessor.transform(X_user)

        rf_prob = rf_model.predict_proba(X_user_transformed)[0][1]
        xgb_prob = xgb_model.predict_proba(X_user_transformed)[0][1]

        results.append({
            "City": row['City'],
            "RF_Risk (%)": f"{rf_prob*100:.2f}",
            "XGB_Risk (%)": f"{xgb_prob*100:.2f}",
            "AQI": aqi,
            "AQI Condition": get_air_quality_message(aqi)
        })

    result_df = pd.DataFrame(results)
    return result_df

# Gradio Professional Interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:

    gr.Markdown("# 💖 AI Health Risk + AQI Check")
    gr.Markdown("Predict your **heart disease risk** and **live air quality** based on your city.")

    mode = gr.Radio(["Single Prediction", "Batch Prediction (CSV Upload)"], label="Choose Mode")

    with gr.Tab("Single User"):
        with gr.Row():
            with gr.Column():
                height = gr.Number(label="📏 Height (cm)")
                weight = gr.Number(label="⚖️ Weight (kg)")
                alcohol = gr.Number(label="🍺 Alcohol per week")
                fruits = gr.Number(label="🍎 Fruits per week")
                veggies = gr.Number(label="🥦 Veggies per week")
                fried = gr.Number(label="🍟 Fried food per week")
                exercise = gr.Radio([0,1], label="🏃 Exercise? (0=No, 1=Yes)")
                smoking_history = gr.Radio([0,1], label="🚬 Smoking History? (0=No, 1=Yes)")

            with gr.Column():
                general_health = gr.Dropdown(['Poor', 'Fair', 'Good', 'Very Good', 'Excellent'], label="🩺 General Health")
                checkup = gr.Dropdown(['Never', 'Within the past year', 'Within 2 years', 'Within 5 years', '5+ years ago'], label="🔎 Last Checkup")
                sex = gr.Dropdown(['Male', 'Female'], label="👤 Sex")
                age_category = gr.Dropdown(['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80+'], label="🎂 Age Category")
                skin_cancer = gr.Radio([0,1], label="🧬 Skin Cancer? (0=No, 1=Yes)")
                other_cancer = gr.Radio([0,1], label="🧬 Other Cancer? (0=No, 1=Yes)")
                depression = gr.Radio([0,1], label="🧠 Depression? (0=No, 1=Yes)")
                diabetes = gr.Radio([0,1], label="🩸 Diabetes? (0=No, 1=Yes)")
                arthritis = gr.Radio([0,1], label="🦴 Arthritis? (0=No, 1=Yes)")
                city = gr.Textbox(label="🏙️ City")

        predict_btn = gr.Button("🔍 Predict Risk")
        output = gr.Markdown()

        predict_btn.click(
            predict_heart_disease,
            inputs=[height, weight, alcohol, fruits, veggies, fried, general_health, checkup, sex, age_category,
                    exercise, skin_cancer, other_cancer, smoking_history, depression, diabetes, arthritis, city],
            outputs=output
        )

    with gr.Tab("Batch (CSV Upload)"):
        file_upload = gr.File(label="Upload CSV File (with health info + city column)")
        batch_output = gr.Dataframe()

        file_upload.change(predict_batch, inputs=file_upload, outputs=batch_output)

demo.launch()




Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://689aad915f5ad67eff.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




NEW WITH DUMMY DATA

In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# --- Data Loading and Preprocessing ---
df = pd.read_csv("/content/CVD_cleaned.csv")

# Create BMI if not exist
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

# Simulate AQI
np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

# Drop unnecessary
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Binary conversion
binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Features
numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Pipelines
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Model Training
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

# --- AQI Fetch ---
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            return 150
    except:
        return 150

API_KEY = ""  # ⚡ Insert your API KEY here

# --- Prediction Function ---
def predict(height, weight, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age, exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, city):
    aqi = fetch_aqi(city, API_KEY)
    bmi = weight / ((height / 100) ** 2)
    high_pollution = 1 if aqi >= 200 else 0

    input_data = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'BMI': bmi,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruit,
        'Green_Vegetables_Consumption': veg,
        'FriedPotato_Consumption': fried_potato,
        'AQI': aqi,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'High_Pollution': high_pollution
    }

    input_df = pd.DataFrame([input_data])
    input_transformed = preprocessor.transform(input_df)

    rf_pred = rf_model.predict(input_transformed)[0]
    xgb_pred = xgb_model.predict(input_transformed)[0]

    rf_prob = rf_model.predict_proba(input_transformed)[0][1]
    xgb_prob = xgb_model.predict_proba(input_transformed)[0][1]

    result = f"🧪 Random Forest Risk: {rf_prob:.2f}\n🧪 XGBoost Risk: {xgb_prob:.2f}\n\n🏥 Random Forest Prediction: {rf_pred} (0: Safe, 1: Risk)\n🏥 XGBoost Prediction: {xgb_pred} (0: Safe, 1: Risk)"
    return result

# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# ❤️ AI Based Cardiovascular Disease Predictor\n##### Powered by Random Forest & XGBoost")

    with gr.Row():
        with gr.Column():
            height = gr.Slider(100, 250, label="Height (cm)")
            weight = gr.Slider(30, 200, label="Weight (kg)")
            alcohol = gr.Slider(0, 20, label="Alcohol Consumption (per week)")
            fruit = gr.Slider(0, 10, label="Fruit Consumption (per week)")
            veg = gr.Slider(0, 10, label="Green Vegetables Consumption (per week)")
            fried_potato = gr.Slider(0, 10, label="Fried Potato Consumption (per week)")
            city = gr.Textbox(label="City Name for AQI", placeholder="Enter your City")

        with gr.Column():
            general_health = gr.Dropdown(["Poor", "Fair", "Good", "Very Good", "Excellent"], label="General Health")
            checkup = gr.Dropdown(["Within the past year", "Within the past 2 years", "Within the past 5 years", "5 or more years ago", "Never"], label="Last Checkup")
            sex = gr.Dropdown(["Male", "Female"], label="Sex")
            age = gr.Dropdown(["18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"], label="Age Category")

            exercise = gr.Radio([0, 1], label="Exercise Regularly (0 = No, 1 = Yes)")
            skin_cancer = gr.Radio([0, 1], label="History of Skin Cancer (0/1)")
            other_cancer = gr.Radio([0, 1], label="Other Cancer (0/1)")
            smoking = gr.Radio([0, 1], label="Smoking History (0/1)")
            depression = gr.Radio([0, 1], label="Depression History (0/1)")
            diabetes = gr.Radio([0, 1], label="Diabetes (0/1)")
            arthritis = gr.Radio([0, 1], label="Arthritis (0/1)")

    predict_btn = gr.Button("🔮 Predict Heart Disease Risk")
    output = gr.Textbox(label="Prediction Result")

    predict_btn.click(
        predict,
        inputs=[height, weight, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age, exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, city],
        outputs=output
    )

app.launch()


Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://386c728e5b17ade36e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import gradio as gr
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer

# Load your dataset
df = pd.read_csv("/content/CVD_cleaned.csv")

# Preprocessing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Adding simulated BP, Cholesterol, Family History
np.random.seed(42)
df['BP'] = np.random.normal(130, 15, size=len(df)).astype(int)
df['Cholesterol'] = np.random.normal(210, 30, size=len(df)).astype(int)
df['Family_History'] = np.random.choice([0,1], size=len(df))

numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption',
                  'BP', 'Cholesterol', 'AQI']

categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']

binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'Family_History', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Preprocessing Pipeline
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

# AQI Fetch Function
def fetch_aqi(city, api_key=""):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data['status'] == "ok":
            return int(data['data']['aqi'])
        else:
            return 150
    except Exception:
        return 150

# Final Prediction Function
def predict_heart_disease(height, weight, alcohol, fruit, veggies, friedpotato,
                           bp, cholesterol, city, general_health, checkup, sex, age_category,
                           exercise, skin_cancer, other_cancer, smoking_history,
                           depression, diabetes, arthritis, family_history):

    bmi = weight / ((height / 100) ** 2)
    aqi = fetch_aqi(city)
    high_pollution = 1 if aqi >= 200 else 0

    user_input = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'BMI': bmi,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruit,
        'Green_Vegetables_Consumption': veggies,
        'FriedPotato_Consumption': friedpotato,
        'BP': bp,
        'Cholesterol': cholesterol,
        'AQI': aqi,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age_category,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking_history,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'Family_History': family_history,
        'High_Pollution': high_pollution
    }

    user_df = pd.DataFrame([user_input])
    X_user = preprocessor.transform(user_df[numerical_cols + categorical_cols + binary_feature_cols])

    rf_prob = rf_model.predict_proba(X_user)[0][1]
    xgb_prob = xgb_model.predict_proba(X_user)[0][1]

    # Risk Logic
    if bp >= 140 or cholesterol >= 240 or diabetes == 1 or family_history == 1 or general_health in ['Poor', 'Fair']:
        risk_label = "⚠️ High Risk"
    elif bp >= 120 or cholesterol >= 200 or diabetes == 1:
        risk_label = "⚠️ Moderate Risk"
    else:
        risk_label = "✅ Low Risk"

    result = f"""
    🧬 Random Forest Risk Probability: {rf_prob:.2f}
    🧬 XGBoost Risk Probability: {xgb_prob:.2f}

    📋 Health-Based Realistic Risk: {risk_label}
    """

    return result

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## 🏥 Heart Disease Risk Predictor with Realistic Health Analysis")

    with gr.Row():
        height = gr.Number(label="Height (cm)")
        weight = gr.Number(label="Weight (kg)")
        alcohol = gr.Number(label="Alcohol Consumption per Week")
        fruit = gr.Number(label="Fruit Servings per Week")
        veggies = gr.Number(label="Green Vegetables per Week")
        friedpotato = gr.Number(label="Fried Potato Servings per Week")

    with gr.Row():
        bp = gr.Number(label="Blood Pressure (systolic mm Hg)")
        cholesterol = gr.Number(label="Cholesterol Level (mg/dL)")
        city = gr.Textbox(label="City Name (for AQI fetch)")

    with gr.Row():
        general_health = gr.Dropdown(["Excellent", "Very Good", "Good", "Fair", "Poor"], label="General Health")
        checkup = gr.Dropdown(["Within past year", "Within past 2 years", "Within past 5 years", "5 or more years ago"], label="Last Checkup")
        sex = gr.Dropdown(["Male", "Female"], label="Sex")
        age_category = gr.Dropdown(['18-24', '25-29', '30-34', '35-39', '40-44',
                                    '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'], label="Age Category")

    with gr.Row():
        exercise = gr.Radio([0,1], label="Exercise Regularly? (0=No, 1=Yes)")
        skin_cancer = gr.Radio([0,1], label="Skin Cancer History?")
        other_cancer = gr.Radio([0,1], label="Other Cancer History?")
        smoking_history = gr.Radio([0,1], label="Smoking History?")
        depression = gr.Radio([0,1], label="Depression?")
        diabetes = gr.Radio([0,1], label="Diabetes?")
        arthritis = gr.Radio([0,1], label="Arthritis?")
        family_history = gr.Radio([0,1], label="Family History of Heart Disease?")

    predict_btn = gr.Button("🔍 Predict Risk")
    output = gr.Textbox(label="Prediction Result")

    predict_btn.click(predict_heart_disease,
                      inputs=[height, weight, alcohol, fruit, veggies, friedpotato,
                              bp, cholesterol, city, general_health, checkup, sex, age_category,
                              exercise, skin_cancer, other_cancer, smoking_history,
                              depression, diabetes, arthritis, family_history],
                      outputs=[output])

# Run App
app.launch()


Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://40cd698650628e76f3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import gradio as gr
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv("/content/CVD_cleaned.csv")

# Preprocessing
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Adding BP, Cholesterol, Family History
df['BP'] = np.random.normal(130, 15, size=len(df)).astype(int)
df['Cholesterol'] = np.random.normal(210, 30, size=len(df)).astype(int)
df['Family_History'] = np.random.choice([0, 1], size=len(df))

# Updated features
numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'BP', 'Cholesterol', 'AQI']

categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']

binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'Family_History', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Preprocessing pipeline
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Train models
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

# AQI fetch
def fetch_aqi(city, api_key=""):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data['status'] == "ok":
            return int(data['data']['aqi'])
        else:
            return 150
    except Exception:
        return 150

# Prediction function
def predict_heart_disease(height, weight, alcohol,
                           bp, cholesterol, city, general_health, checkup, sex, age_category,
                           exercise, skin_cancer, other_cancer, smoking_history,
                           depression, diabetes, arthritis, family_history):

    bmi = weight / ((height / 100) ** 2)
    aqi = fetch_aqi(city)
    high_pollution = 1 if aqi >= 200 else 0

    user_input = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'BMI': bmi,
        'Alcohol_Consumption': alcohol,
        'BP': bp,
        'Cholesterol': cholesterol,
        'AQI': aqi,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age_category,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking_history,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'Family_History': family_history,
        'High_Pollution': high_pollution
    }

    user_df = pd.DataFrame([user_input])
    X_user = preprocessor.transform(user_df[numerical_cols + categorical_cols + binary_feature_cols])

    rf_prob = rf_model.predict_proba(X_user)[0][1]
    xgb_prob = xgb_model.predict_proba(X_user)[0][1]

    # Realistic risk
    if bp >= 140 or cholesterol >= 240 or diabetes == 1 or family_history == 1 or general_health in ['Poor', 'Fair']:
        risk_label = "⚠️ High Risk"
    elif bp >= 120 or cholesterol >= 200 or diabetes == 1:
        risk_label = "⚠️ Moderate Risk"
    else:
        risk_label = "✅ Low Risk"

    result = f"""
    🧬 Random Forest Risk Probability: {rf_prob:.2f}
    🧬 XGBoost Risk Probability: {xgb_prob:.2f}

    📋 Health-Based Realistic Risk: {risk_label}
    """

    return result

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## 🏥 Heart Disease Risk Predictor (Cleaned Version)")

    with gr.Row():
        height = gr.Number(label="Height (cm)")
        weight = gr.Number(label="Weight (kg)")
        alcohol = gr.Number(label="Alcohol Consumption (per week)")

    with gr.Row():
        bp = gr.Number(label="Blood Pressure (systolic mm Hg)")
        cholesterol = gr.Number(label="Cholesterol Level (mg/dL)")
        city = gr.Textbox(label="City Name (for AQI fetch)")

    with gr.Row():
        general_health = gr.Dropdown(["Excellent", "Very Good", "Good", "Fair", "Poor"], label="General Health")
        checkup = gr.Dropdown(["Within past year", "Within past 2 years", "Within past 5 years", "5 or more years ago"], label="Last Checkup")
        sex = gr.Dropdown(["Male", "Female"], label="Sex")
        age_category = gr.Dropdown(['18-24', '25-29', '30-34', '35-39', '40-44',
                                    '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'], label="Age Category")

    with gr.Row():
        exercise = gr.Radio([0,1], label="Exercise Regularly? (0=No, 1=Yes)")
        skin_cancer = gr.Radio([0,1], label="Skin Cancer History?")
        other_cancer = gr.Radio([0,1], label="Other Cancer History?")
        smoking_history = gr.Radio([0,1], label="Smoking History?")
        depression = gr.Radio([0,1], label="Depression?")
        diabetes = gr.Radio([0,1], label="Diabetes?")
        arthritis = gr.Radio([0,1], label="Arthritis?")
        family_history = gr.Radio([0,1], label="Family History of Heart Disease?")

    predict_btn = gr.Button("🔍 Predict Risk")
    output = gr.Textbox(label="Prediction Result")

    predict_btn.click(predict_heart_disease,
                      inputs=[height, weight, alcohol,
                              bp, cholesterol, city, general_health, checkup, sex, age_category,
                              exercise, skin_cancer, other_cancer, smoking_history,
                              depression, diabetes, arthritis, family_history],
                      outputs=[output])

# Launch App
app.launch()




Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a3dbcad2f4eb092d1c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# --- Load Data ---
df = pd.read_csv("/content/CVD_cleaned.csv")

# --- Feature Engineering ---
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# --- Preprocessing ---
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# --- Train Models ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

# --- Fetch Real AQI ---
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data["status"] == "ok":
            return int(data["data"]["aqi"])
        else:
            return 150
    except:
        return 150

# --- Hidden API Key ---
API_KEY = "your_aqi_api_key_here"  # Insert your API key here

# --- Prediction Function ---
def predict(height, weight, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age,
            exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, city):

    aqi = fetch_aqi(city, API_KEY)
    bmi = weight / ((height / 100) ** 2)
    high_pollution = 1 if aqi >= 200 else 0

    input_data = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'BMI': bmi,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruit,
        'Green_Vegetables_Consumption': veg,
        'FriedPotato_Consumption': fried_potato,
        'AQI': aqi,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'High_Pollution': high_pollution
    }

    input_df = pd.DataFrame([input_data])
    input_transformed = preprocessor.transform(input_df)

    # You can choose either rf_model or xgb_model
    pred_prob = rf_model.predict_proba(input_transformed)[0][1]

    # Set thresholds
    if pred_prob < 0.3:
        risk_level = "✅ Low Risk"
    elif pred_prob < 0.6:
        risk_level = "⚠️ Moderate Risk"
    else:
        risk_level = "🚨 High Risk"

    result = f"""
    📍 City: {city}
    📈 Current AQI: {aqi}

    🩺 Cardiovascular Disease Risk: {risk_level}
    """

    return result

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# ❤️ Cardiovascular Disease Predictor\n### Using ML + Live AQI Data")

    with gr.Row():
        with gr.Column():
            height = gr.Number(label="Height (cm)")
            weight = gr.Number(label="Weight (kg)")
            alcohol = gr.Number(label="Alcohol Consumption (per week)")
            fruit = gr.Number(label="Fruit Consumption (per week)")
            veg = gr.Number(label="Green Vegetables Consumption (per week)")
            fried_potato = gr.Number(label="Fried Potato Consumption (per week)")
            city = gr.Textbox(label="City (for real-time AQI)", placeholder="Enter City")

        with gr.Column():
            general_health = gr.Dropdown(["Poor", "Fair", "Good", "Very Good", "Excellent"], label="General Health")
            checkup = gr.Dropdown(["Within the past year", "Within the past 2 years", "Within the past 5 years", "5 or more years ago", "Never"], label="Last Checkup")
            sex = gr.Dropdown(["Male", "Female"], label="Sex")
            age = gr.Dropdown(["18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"], label="Age Category")

            exercise = gr.Radio([0, 1], label="Exercise (0 = No, 1 = Yes)")
            skin_cancer = gr.Radio([0, 1], label="Skin Cancer History (0/1)")
            other_cancer = gr.Radio([0, 1], label="Other Cancer (0/1)")
            smoking = gr.Radio([0, 1], label="Smoking History (0/1)")
            depression = gr.Radio([0, 1], label="Depression History (0/1)")
            diabetes = gr.Radio([0, 1], label="Diabetes (0/1)")
            arthritis = gr.Radio([0, 1], label="Arthritis (0/1)")

    predict_btn = gr.Button("🔮 Predict Risk")
    output = gr.Textbox(label="Prediction Result")

    predict_btn.click(
        predict,
        inputs=[height, weight, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age,
                exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, city],
        outputs=output
    )

app.launch()




Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://85fbbea18959b83dec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio
import pandas as pd
import numpy as np
import requests
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# --- Load Dataset ---
df = pd.read_csv("/content/CVD_cleaned.csv")

# Create BMI if not exists
if 'BMI' not in df.columns:
    df['BMI'] = df['Weight_(kg)'] / ((df['Height_(cm)'] / 100) ** 2)

# Simulate AQI
np.random.seed(42)
df['AQI'] = np.random.normal(loc=150, scale=50, size=len(df)).astype(int)
df['High_Pollution'] = (df['AQI'] >= 200).astype(int)

# Drop unnecessary columns
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Binary conversion
binary_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History', 'Depression', 'Diabetes', 'Arthritis']
for col in binary_cols + ['Heart_Disease']:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

# Features
numerical_cols = ['Height_(cm)', 'Weight_(kg)', 'BMI', 'Alcohol_Consumption',
                  'Fruit_Consumption', 'Green_Vegetables_Consumption', 'FriedPotato_Consumption', 'AQI']
categorical_cols = ['General_Health', 'Checkup', 'Sex', 'Age_Category']
binary_feature_cols = ['Exercise', 'Skin_Cancer', 'Other_Cancer', 'Smoking_History',
                       'Depression', 'Diabetes', 'Arthritis', 'High_Pollution']

X = df[numerical_cols + categorical_cols + binary_feature_cols]
y = df['Heart_Disease']

# Pipelines
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
], remainder='passthrough')

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Train Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

# --- AQI Fetch ---
def fetch_aqi(city, api_key):
    try:
        url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
        response = requests.get(url)
        data = response.json()
        if data['status'] == 'ok':
            return int(data['data']['aqi'])
        else:
            return 150  # fallback
    except:
        return 150

API_KEY = ""  # Insert your WAQI API key here

# --- Prediction Logic ---
def predict(height, weight, systolic_bp, cholesterol, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age, exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, family_history, city):
    aqi = fetch_aqi(city, API_KEY)
    bmi = weight / ((height / 100) ** 2)
    high_pollution = 1 if aqi >= 200 else 0

    # --- Risk Point Calculation (Real World Based) ---
    risk_points = 0
    if systolic_bp > 140:
        risk_points += 2
    if cholesterol > 200:
        risk_points += 2
    if diabetes == 1:
        risk_points += 2
    if family_history == 1:
        risk_points += 2
    if general_health in ["Poor", "Fair"]:
        risk_points += 2
    if bmi > 30:
        risk_points += 1
    if smoking == 1:
        risk_points += 1
    if aqi >= 200:
        risk_points += 1

    # Prepare input for ML Model
    input_data = {
        'Height_(cm)': height,
        'Weight_(kg)': weight,
        'BMI': bmi,
        'Alcohol_Consumption': alcohol,
        'Fruit_Consumption': fruit,
        'Green_Vegetables_Consumption': veg,
        'FriedPotato_Consumption': fried_potato,
        'AQI': aqi,
        'General_Health': general_health,
        'Checkup': checkup,
        'Sex': sex,
        'Age_Category': age,
        'Exercise': exercise,
        'Skin_Cancer': skin_cancer,
        'Other_Cancer': other_cancer,
        'Smoking_History': smoking,
        'Depression': depression,
        'Diabetes': diabetes,
        'Arthritis': arthritis,
        'High_Pollution': high_pollution
    }
    input_df = pd.DataFrame([input_data])
    input_transformed = preprocessor.transform(input_df)

    # ML Prediction
    prob = model.predict_proba(input_transformed)[0][1]

    # --- Adjust Points Slightly Based on ML ---
    if prob > 0.8:
        risk_points += 1
    elif prob < 0.2:
        risk_points -= 1

    # --- Final Risk Level ---
    if risk_points >= 7:
        risk_level = "🚨 High Risk"
    elif risk_points >= 4:
        risk_level = "⚠️ Moderate Risk"
    else:
        risk_level = "✅ Low Risk"

    return f"City AQI: {aqi}\n\nYour Final Heart Disease Risk Level: {risk_level}"

# --- Gradio App ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# ❤️ Cardiovascular Disease Risk Predictor\n#### (Real World Data + ML Assisted)")

    with gr.Row():
        with gr.Column():
            height = gr.Slider(100, 250, label="Height (cm)")
            weight = gr.Slider(30, 200, label="Weight (kg)")
            systolic_bp = gr.Slider(80, 200, label="Systolic BP (mmHg)")
            cholesterol = gr.Slider(100, 300, label="Cholesterol (mg/dL)")
            alcohol = gr.Slider(0, 20, label="Alcohol Consumption (per week)")
            fruit = gr.Slider(0, 10, label="Fruit Consumption (per week)")
            veg = gr.Slider(0, 10, label="Vegetable Consumption (per week)")
            fried_potato = gr.Slider(0, 10, label="Fried Potato Consumption (per week)")
            city = gr.Textbox(label="City for AQI", placeholder="Enter your City")

        with gr.Column():
            general_health = gr.Dropdown(["Poor", "Fair", "Good", "Very Good", "Excellent"], label="General Health")
            checkup = gr.Dropdown(["Within the past year", "Within the past 2 years", "Within the past 5 years", "5 or more years ago", "Never"], label="Last Checkup")
            sex = gr.Dropdown(["Male", "Female"], label="Sex")
            age = gr.Dropdown(["18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"], label="Age Category")

            exercise = gr.Radio([0, 1], label="Exercise Regularly (0 = No, 1 = Yes)")
            skin_cancer = gr.Radio([0, 1], label="History of Skin Cancer (0/1)")
            other_cancer = gr.Radio([0, 1], label="Other Cancer History (0/1)")
            smoking = gr.Radio([0, 1], label="Smoking History (0/1)")
            depression = gr.Radio([0, 1], label="Depression History (0/1)")
            diabetes = gr.Radio([0, 1], label="Diabetes (0/1)")
            arthritis = gr.Radio([0, 1], label="Arthritis (0/1)")
            family_history = gr.Radio([0, 1], label="Family History of Heart Disease (0/1)")

    predict_btn = gr.Button("🔮 Predict Risk")
    output = gr.Textbox(label="Prediction Result")

    predict_btn.click(
        predict,
        inputs=[height, weight, systolic_bp, cholesterol, alcohol, fruit, veg, fried_potato, general_health, checkup, sex, age, exercise, skin_cancer, other_cancer, smoking, depression, diabetes, arthritis, family_history, city],
        outputs=output
    )

app.launch()

Collecting gradio
  Downloading gradio-5.28.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6

Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://621e07afefa5f1b0e1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
