In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.5.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.linear_model import LinearRegression, Ridge, SGDRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Load the dataset
file_path = "https://raw.githubusercontent.com/h669841/Diverse/refs/heads/main/Updated_Correlated_Synthetic_Health_Dataset_with_Alcohol_Consumption_v7.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.drop("Age_at_Death", axis=1)
y = data["Age_at_Death"]

def evaluate_modelRandomForest(test_size, random_states):
    mae_list = []


    r2_list = []
    for state in random_states:
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)


        # Initialize and train the linear regression model
        model = RandomForestRegressor()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

    # Calculate average MAE and R2
    avg_mae = sum(mae_list) / len(mae_list)
    avg_r2 = sum(r2_list) / len(r2_list)

    print(f"Test Size: RandForest {test_size}")
    print(f"Average Mean Absolute Error (RandForest): {avg_mae}")
    print(f"Average R-squared RandForest(R2): {avg_r2}\n")



def evaluate_DecisionTreeRegressor(test_size, random_states):
    mae_list = []


    r2_list = []
    for state in random_states:
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)


        # Initialize and train the linear regression model
        model = DecisionTreeRegressor()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

    # Calculate average MAE and R2
    avg_mae = sum(mae_list) / len(mae_list)
    avg_r2 = sum(r2_list) / len(r2_list)

    print(f"Test Size: DecisionTreeRegressor {test_size}")
    print(f"Average Mean Absolute Error (DecisionTreeRegressor): {avg_mae}")
    print(f"Average R-squared DecisionTreeRegressor(R2): {avg_r2}\n")







def evaluate_modelGradientBoostingRegressor(test_size, random_states):
    mae_list = []


    r2_list = []
    for state in random_states:
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)


        # Initialize and train the linear regression model
        model = GradientBoostingRegressor()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

    # Calculate average MAE and R2
    avg_mae = sum(mae_list) / len(mae_list)
    avg_r2 = sum(r2_list) / len(r2_list)

    print(f"Test Size: GradientBoostingRegressor {test_size}")
    print(f"Average Mean Absolute Error (GradientBoostingRegressor): {avg_mae}")
    print(f"Average R-squared GradientBoostingRegressor(R2): {avg_r2}\n")









def evaluate_modelRidge(test_size, random_states):
    mae_list = []


    r2_list = []
    for state in random_states:
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)


        # Initialize and train the linear regression model
        model = Ridge()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

    # Calculate average MAE and R2
    avg_mae = sum(mae_list) / len(mae_list)
    avg_r2 = sum(r2_list) / len(r2_list)

    print(f"Test Size: Ridge {test_size}")
    print(f"Average Mean Absolute Error (Ridge): {avg_mae}")
    print(f"Average R-squared Ridge(R2): {avg_r2}\n")




def evaluate_modelSDG(test_size, random_states, maxIter):

    for mi in maxIter:

      mae_list = []
      r2_list = []

      for state in random_states:

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)




        # Initialize and train the linear regression model
        model = SGDRegressor(max_iter=mi)
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

      # Calculate average MAE and R2
      avg_mae = sum(mae_list) / len(mae_list)
      avg_r2 = sum(r2_list) / len(r2_list)

      print(f"Test Size: SDG {test_size}")
      print(f"Test Size: SDG max Iter {mi}")
      print(f"Average Mean Absolute Error (SDG): {avg_mae}")
      print(f"Average R-squared SDG(R2): {avg_r2}\n")




def evaluate_model(test_size, random_states):
    mae_list = []


    r2_list = []
    for state in random_states:
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=state)


        # Initialize and train the linear regression model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mae_list.append(mae)
        r2_list.append(r2)

    # Calculate average MAE and R2
    avg_mae = sum(mae_list) / len(mae_list)
    avg_r2 = sum(r2_list) / len(r2_list)

    print(f"Test Size: {test_size}")
    print(f"Average Mean Absolute Error (MAE): {avg_mae}")
    print(f"Average R-squared (R2): {avg_r2}\n")

# Evaluate the model with different test sizes and multiple random states
random_states = [0, 1, 42, 99, 123]



evaluate_modelRandomForest(test_size=0.2, random_states=random_states)
evaluate_modelRandomForest(test_size=0.35, random_states=random_states)




evaluate_DecisionTreeRegressor(test_size=0.2, random_states=random_states)
evaluate_DecisionTreeRegressor(test_size=0.35, random_states=random_states)


evaluate_modelGradientBoostingRegressor(test_size=0.2, random_states=random_states)
evaluate_modelGradientBoostingRegressor(test_size=0.35, random_states=random_states)



evaluate_model(test_size=0.2, random_states=random_states)
evaluate_model(test_size=0.35, random_states=random_states)


maxIter=[100,1000,10000]
evaluate_modelSDG(test_size=0.2, random_states=random_states, maxIter=maxIter)
evaluate_modelSDG(test_size=0.35, random_states=random_states,maxIter=maxIter)



evaluate_modelRidge(test_size=0.2, random_states=random_states)
evaluate_modelRidge(test_size=0.35, random_states=random_states)



# Print model coefficients for interpretation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor()
model.fit(X_train, y_train)

#print("\nModel Coefficients:")
#for feature, coef in zip(X.columns, model.coef_):
 #   print(f"{feature}: {coef}")


Test Size: RandForest 0.2
Average Mean Absolute Error (RandForest): 4.58096
Average R-squared RandForest(R2): 0.7182297525210484

Test Size: RandForest 0.35
Average Mean Absolute Error (RandForest): 4.580274285714286
Average R-squared RandForest(R2): 0.7226677912237203

Test Size: DecisionTreeRegressor 0.2
Average Mean Absolute Error (DecisionTreeRegressor): 5.92
Average R-squared DecisionTreeRegressor(R2): 0.5001368321115514

Test Size: DecisionTreeRegressor 0.35
Average Mean Absolute Error (DecisionTreeRegressor): 5.912000000000001
Average R-squared DecisionTreeRegressor(R2): 0.5264211411020272

Test Size: GradientBoostingRegressor 0.2
Average Mean Absolute Error (GradientBoostingRegressor): 4.224116078093751
Average R-squared GradientBoostingRegressor(R2): 0.7669917609867287

Test Size: GradientBoostingRegressor 0.35
Average Mean Absolute Error (GradientBoostingRegressor): 4.327383838590937
Average R-squared GradientBoostingRegressor(R2): 0.7574714634353935

Test Size: 0.2
Average M



Test Size: SDG 0.2
Test Size: SDG max Iter 100
Average Mean Absolute Error (SDG): 79733801.50044909
Average R-squared SDG(R2): -133431772743248.0





Test Size: SDG 0.2
Test Size: SDG max Iter 1000
Average Mean Absolute Error (SDG): 221309.49765911317
Average R-squared SDG(R2): -927350561.298489

Test Size: SDG 0.2
Test Size: SDG max Iter 10000
Average Mean Absolute Error (SDG): 26795.595216694193
Average R-squared SDG(R2): -36406200.99988719

Test Size: SDG 0.35
Test Size: SDG max Iter 100
Average Mean Absolute Error (SDG): 49110158.858633384
Average R-squared SDG(R2): -55564838011002.86





Test Size: SDG 0.35
Test Size: SDG max Iter 1000
Average Mean Absolute Error (SDG): 8037983.893449724
Average R-squared SDG(R2): -2774639980243.901

Test Size: SDG 0.35
Test Size: SDG max Iter 10000
Average Mean Absolute Error (SDG): 14467523.434855634
Average R-squared SDG(R2): -4413671649907.138

Test Size: Ridge 0.2
Average Mean Absolute Error (Ridge): 5.997482140782154
Average R-squared Ridge(R2): 0.44466292904513444

Test Size: Ridge 0.35
Average Mean Absolute Error (Ridge): 5.937323700994094
Average R-squared Ridge(R2): 0.465190971603509



In [3]:
data.describe()

Unnamed: 0,Age_at_Death,Gender,Smoking_Status,Physical_Activity,BMI,Diet,Stress_Level,Income_Level,Alcohol_Consumption
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,81.028,0.51,0.215,0.895,24.975133,0.899,0.802,0.898,0.702
std,10.979151,0.50015,0.543204,0.697468,3.902818,0.692299,0.753235,0.708589,0.859031
min,50.0,0.0,0.0,0.0,11.700977,0.0,0.0,0.0,0.0
25%,77.0,0.0,0.0,0.0,22.256997,0.0,0.0,0.0,0.0
50%,83.0,1.0,0.0,1.0,25.003759,1.0,1.0,1.0,0.0
75%,89.0,1.0,0.0,1.0,27.784076,1.0,1.0,1.0,1.0
max,99.0,1.0,2.0,2.0,37.247788,2.0,2.0,2.0,3.0


In [None]:
# Define prediction function
def predict_life_expectancy(age, gender, smoking_status, physical_activity, height, weight, diet, stress_level, income_level, alcohol_consumption, smoking_resolution, drinking_resolution, diet_resolution, physical_activity_resolution, stress_resolution):

    # Calculate BMI
    bmi = weight / ((height / 100) ** 2)

    # Define current feature set
    current_features = pd.DataFrame({
        'Gender': [gender],
        'Smoking_Status': [smoking_status],
        'Physical_Activity': [physical_activity],
        'BMI': [bmi],
        'Diet': [diet],
        'Stress_Level': [stress_level],
        'Income_Level': [income_level],
        'Alcohol_Consumption': [alcohol_consumption]
    })

    # Predict current life expectancy
    current_life_expectancy = model.predict(current_features)[0]

    # Apply New Year's resolution changes
    updated_features = current_features.copy()
    updated_features['Smoking_Status'] = 0 if smoking_resolution == 0 else smoking_status  # Update smoking status if quitting
    updated_features['Alcohol_Consumption'] = 0 if drinking_resolution == 0 else alcohol_consumption  # Update drinking status if quitting
    updated_features['Diet'] = max(diet, diet_resolution)  # Update diet to the healthier value
    updated_features['Physical_Activity'] = max(physical_activity, physical_activity_resolution)  # Update physical activity to higher level
    #updated_features['Stress_Level'] = min(stress_level, stress_resolution)  # Update stress level to lower level
    updated_features['Stress_Level'] = 0 if stress_resolution == 0 else stress_level  # Update stress level if user are going to focus on reducing stress

    # Update BMI based on activity and diet resolutions
    if physical_activity_resolution > physical_activity:
        updated_features['BMI'] = max(bmi - 1, 18.5)  # Assume BMI decreases if physical activity increases
    if diet_resolution > diet:
        updated_features['BMI'] = (updated_features['BMI'] - 0.5).clip(lower=18.5)  # Assume BMI decreases slightly with improved diet

    # Predict updated life expectancy
    updated_life_expectancy = model.predict(updated_features)[0]

    # Calculate extra years of life
    extra_years = updated_life_expectancy - current_life_expectancy

    # Calculate the extra days based on one year of keeping the resolution
    extra_days = (extra_years / ((81 + extra_years) - age)) * 365

    return f"If you follow your New Year's resolution, you could live approximately {extra_years:.2f} more years.\n\nIf you follow your New Year's resolution for one year, you could live approximately {extra_days:.2f} more days."


# Create Gradio interface
title = "New year resolution calculator"
desc = "Find out how many years you can save if you achieve your new years-resolutions."
long_desc = "Disclaimer"
interface = gr.Interface(
    fn=predict_life_expectancy,
    title=title,
    description=desc,
    article=long_desc,
    inputs=[
        gr.Number(label="Age", precision=0, value=20, minimum=20, maximum=70),
        gr.Dropdown(choices=["Male", "Female"], type="index", label="What is your gender?"),
        gr.Dropdown(choices=["Non-smoker", "Occasional smoker", "Regular smoker"], type="index", label="What is your smoking status?"),
        gr.Dropdown(choices=["0 times", "1-3 times", "More than 3 times"], type="index", label="How many days do you exercise per week?"),
        gr.Number(label="Height (in cm)", precision=0, value=170, minimum=50),
        gr.Number(label="Weight (in kg)", precision=0, value=70, minimum=20),
        gr.Dropdown(choices=["Unhealthy", "Balanced", "Healthy"], type="index", label="How do you consider your diet?"),
        gr.Dropdown(choices=["Low", "Medium", "High"], type="index", label="What is your stress level"),
        gr.Dropdown(choices=["Low", "Medium", "High"], type="index", label="What is your income level?"),
        gr.Dropdown(choices=["0-8 units/week", "8-17 units/week", "17-25 units/week", "25-33 units/week"], type="index", label="How many units of alcohol do you drink/week?"),
        gr.Dropdown(choices=["Yes", "No"], type="index", label="Will you quit smoking as a New Year's resolution?", value="No"),
        gr.Dropdown(choices=["Yes", "No"], type="index", label="Will you quit drinking alcohol as a New Year's resolution?", value="No"),
        gr.Dropdown(choices=["Unhealthy", "Balanced", "Healthy"], type="index", label="How will your diet be in the new year?"),
        gr.Dropdown(choices=["0 times", "1-3 times", "More than 3 times"], type="index", label="How many days will your exercise every week in the new year?"),
        #gr.Slider(0, 2, step=1, label="Will focus on reducing stress as a New Year's resolution? (0: Low, 1: Medium, 2: High)")
        gr.Dropdown(choices=["Yes", "No"], type="index", label="Will focus on reducing stress as a New Year's resolution?")
    ],
    outputs="text"
)

# Launch the Gradio app
interface.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://602e12d2c53cf27588.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
