#### City metrics

- This notebook is to bridge the user input by city, and translate associated metrics as green_space, population, and air quality index to the model input.

In [2]:
import pandas as pd
import json

# Load the cleaned final dataset
df = pd.read_csv('/Users/alexandreribeiro/Documents/GitHub/final_project/data/clean/data_with_cities.csv')

# Remove duplicates based on the 'city' column
df_unique = df.drop_duplicates(subset='city')

# Create a dictionary mapping each city to its metrics
city_mapping = df_unique.set_index('city')[['population_city', 'greenspacearea_km2', 'AQI']].to_dict(orient='index')

# Save the mapping to a JSON file for use in the Streamlit app
with open('city_mapping.json', 'w') as f:
    json.dump(city_mapping, f)

print("City-metrics mapping saved successfully.")

City-metrics mapping saved successfully.


In [3]:
def adjust_metrics_based_on_input(city_metrics, obesity, smoking, copd, depression):
    """
    Adjust city-level health metrics based on user input.
    
    Args:
    city_metrics (dict): Original metrics for the selected city.
    obesity (str): "Yes" or "No" indicating user obesity.
    smoking (str): "Yes" or "No" indicating user smoking.
    copd (str): "Yes" or "No" indicating user COPD.
    depression (str): "Yes" or "No" indicating user depression.
    
    Returns:
    dict: Adjusted metrics based on user input.
    """
    adjusted_metrics = city_metrics.copy()
    
    # Adjust based on user inputs
    if obesity == "Yes":
        adjusted_metrics['adjusted_obesity_rate'] += 0.1  # Increase by a small factor
    
    if smoking == "Yes":
        adjusted_metrics['adjusted_smoking_rate'] += 0.1  # Increase by a small factor
    
    if copd == "Yes":
        adjusted_metrics['adjusted_copd_rate'] += 0.05  # Increase by a small factor
    
    if depression == "Yes":
        adjusted_metrics['adjusted_depression_rate'] += 0.05  # Increase by a small factor
    
    return adjusted_metrics

In [4]:
import json

# Load the city-metrics mapping
with open('city_mapping.json', 'r') as f:
    city_mapping = json.load(f)

def get_city_metrics(city_name):
    """
    Retrieve metrics for the selected city.
    
    Args:
    city_name (str): Name of the city.
    
    Returns:
    dict: Metrics for the city.
    """
    return city_mapping.get(city_name, None)

In [5]:
def make_adjusted_prediction(city_name, obesity, smoking, copd, depression, scaler, models):
    # Step 1: Retrieve city metrics
    city_metrics = get_city_metrics(city_name)
    if city_metrics is None:
        raise ValueError(f"No data available for city: {city_name}")
    
    # Step 2: Adjust metrics based on user input
    adjusted_metrics = adjust_metrics_based_on_input(city_metrics, obesity, smoking, copd, depression)
    
    # Step 3: Prepare input for model (convert to DataFrame)
    feature_names = ['population_city', 'greenspacearea_km2', 'AQI', 'adjusted_obesity_rate',
                     'adjusted_smoking_rate', 'adjusted_copd_rate', 'adjusted_depression_rate']
    
    input_data = pd.DataFrame([adjusted_metrics], columns=feature_names)
    
    # Step 4: Scale the input data
    input_data_scaled = scaler.transform(input_data)
    
    # Step 5: Make the prediction
    prediction = make_prediction(input_data_scaled, models)
    
    return prediction

In [6]:
df2 = pd.read_csv('/Users/alexandreribeiro/Documents/GitHub/final_project/data/clean/data_with_cities.csv')

df2

Unnamed: 0,city,country,population_city,greenspacearea_km2,AQI,adjusted_obesity_rate,adjusted_smoking_rate,adjusted_exercising_rate,adjusted_chronic_rate,adjusted_life_expectancy,adjusted_copd_rate,adjusted_depression_rate
0,Town 'n' Country,United States,5.580401,-0.012540,0.014227,1.682852,1.389244,2.119851,1.772071,1.960849,1.628696,1.211327
1,Atascocita,United States,5.553693,0.516264,-0.078162,1.117487,0.624299,0.883269,0.895887,0.976170,0.587606,0.611860
2,Fort Myers,United States,5.520049,0.080073,-0.946613,1.654772,1.364492,2.087964,1.743226,1.930107,1.602239,1.187964
3,Citrus Heights,United States,5.394723,-0.529781,1.806562,0.032259,-0.171918,0.500936,0.056370,0.420931,-0.243810,0.002308
4,San Ramon,United States,5.314361,-0.271656,2.521032,0.012965,-0.187696,0.476070,0.036783,0.396717,-0.258318,-0.015646
...,...,...,...,...,...,...,...,...,...,...,...,...
3233,Southampton,United States,-0.871769,-0.759604,-0.047365,-1.052364,-0.903615,-1.062314,-1.001259,-1.073516,-0.944539,-0.990235
3234,Lewistown,United States,-0.872008,-1.142277,-0.848065,-1.052569,-0.903838,-1.062520,-1.001482,-1.073726,-0.944733,-0.990432
3235,Delavan,United States,-0.872247,-0.983041,0.574717,-0.193296,-0.203849,-0.257984,-0.357264,-0.345200,-0.432375,-0.219548
3236,Altoona,United States,-0.873283,-1.171349,-0.700243,-0.195497,-0.205881,-0.260105,-0.359216,-0.347225,-0.434001,-0.221578
