#### City metrics

- This notebook is to bridge the user input by city, and translate associated metrics as green_space, population, and air quality index to the model input.

In [2]:
import pandas as pd
import json

# Load the cleaned final dataset
df = pd.read_csv('/Users/alexandreribeiro/Documents/GitHub/final_project/data/clean/data_with_cities.csv')

# Remove duplicates based on the 'city' column
df_unique = df.drop_duplicates(subset='city')

# Create a dictionary mapping each city to its metrics
city_mapping = df_unique.set_index('city')[['population_city', 'greenspacearea_km2', 'AQI']].to_dict(orient='index')

# Save the mapping to a JSON file for use in the Streamlit app
with open('city_mapping.json', 'w') as f:
    json.dump(city_mapping, f)

print("City-metrics mapping saved successfully.")

City-metrics mapping saved successfully.


In [3]:
def adjust_metrics_based_on_input(city_metrics, obesity, smoking, copd, depression):
    """
    Adjust city-level health metrics based on user input.
    
    Args:
    city_metrics (dict): Original metrics for the selected city.
    obesity (str): "Yes" or "No" indicating user obesity.
    smoking (str): "Yes" or "No" indicating user smoking.
    copd (str): "Yes" or "No" indicating user COPD.
    depression (str): "Yes" or "No" indicating user depression.
    
    Returns:
    dict: Adjusted metrics based on user input.
    """
    adjusted_metrics = city_metrics.copy()
    
    # Adjust based on user inputs
    if obesity == "Yes":
        adjusted_metrics['adjusted_obesity_rate'] += 0.1  # Increase by a small factor
    
    if smoking == "Yes":
        adjusted_metrics['adjusted_smoking_rate'] += 0.1  # Increase by a small factor
    
    if copd == "Yes":
        adjusted_metrics['adjusted_copd_rate'] += 0.05  # Increase by a small factor
    
    if depression == "Yes":
        adjusted_metrics['adjusted_depression_rate'] += 0.05  # Increase by a small factor
    
    return adjusted_metrics

In [4]:
import json

# Load the city-metrics mapping
with open('city_mapping.json', 'r') as f:
    city_mapping = json.load(f)

def get_city_metrics(city_name):
    """
    Retrieve metrics for the selected city.
    
    Args:
    city_name (str): Name of the city.
    
    Returns:
    dict: Metrics for the city.
    """
    return city_mapping.get(city_name, None)

In [5]:
def make_adjusted_prediction(city_name, obesity, smoking, copd, depression, scaler, models):
    # Step 1: Retrieve city metrics
    city_metrics = get_city_metrics(city_name)
    if city_metrics is None:
        raise ValueError(f"No data available for city: {city_name}")
    
    # Step 2: Adjust metrics based on user input
    adjusted_metrics = adjust_metrics_based_on_input(city_metrics, obesity, smoking, copd, depression)
    
    # Step 3: Prepare input for model (convert to DataFrame)
    feature_names = ['population_city', 'greenspacearea_km2', 'AQI', 'adjusted_obesity_rate',
                     'adjusted_smoking_rate', 'adjusted_copd_rate', 'adjusted_depression_rate']
    
    input_data = pd.DataFrame([adjusted_metrics], columns=feature_names)
    
    # Step 4: Scale the input data
    input_data_scaled = scaler.transform(input_data)
    
    # Step 5: Make the prediction
    prediction = make_prediction(input_data_scaled, models)
    
    return prediction