In [3]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
import numpy as np
from weight_optimization import calculate_weights
from dataloader import load_data

def preprocess_data(county_coordinates, smoking_data, copd_data, covid_data, sepsis_data, drowning_data, vaccination_data, flu_data, pneumonia_data):
    state_abbreviations = {
        'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming'
    }
    merged_covid = pd.merge(covid_data, county_coordinates, left_on='fips', right_on='county_fips')
    merged_covid['cases_per_population'] = merged_covid['cases'] / merged_covid['population']
    merged_covid.dropna(subset=['lat', 'lng', 'cases_per_population'], inplace=True)
    filtered_covid = merged_covid[(merged_covid['state_name'] != 'Alaska') & (merged_covid['state_name'] != 'Hawaii') & (merged_covid['lat'] < 60)]
    min_covid, max_covid = filtered_covid['cases_per_population'].min(), filtered_covid['cases_per_population'].max()
    normalized_covid = filtered_covid.copy()
    normalized_covid['normalized_covid'] = (filtered_covid['cases_per_population'] - min_covid) / (max_covid - min_covid)

    merged_smoking = pd.merge(county_coordinates, smoking_data, left_on='state_name', right_on='LocationDesc', how='left')
    merged_smoking['Data_Value'].fillna(merged_smoking.groupby('state_name')['Data_Value'].transform('mean'), inplace=True)
    filtered_smoking = merged_smoking[(merged_smoking['state_name'] != 'Alaska') & (merged_smoking['state_name'] != 'Hawaii') & (merged_smoking['lat'] < 60)]
    min_smoking, max_smoking = filtered_smoking['Data_Value'].min(), filtered_smoking['Data_Value'].max()
    normalized_smoking = filtered_smoking.copy()
    normalized_smoking['normalized_smoking'] = (filtered_smoking['Data_Value'] - min_smoking) / (max_smoking - min_smoking)

    merged_copd = pd.merge(copd_data, county_coordinates, left_on='LocationID', right_on='county_fips')
    merged_copd.dropna(subset=['lat', 'lng', 'Percent_COPD'], inplace=True)
    filtered_copd = merged_copd[(merged_copd['state_name'] != 'Alaska') & (merged_copd['state_name'] != 'Hawaii') & (merged_copd['lat'] < 60)]
    min_copd, max_copd = 3.2, 15.5
    normalized_copd = filtered_copd.copy()
    normalized_copd['normalized_copd'] = (filtered_copd['Percent_COPD'] - min_copd) / (max_copd - min_copd)

    sepsis_data['STATE_FULL'] = sepsis_data['STATE'].apply(lambda x: state_abbreviations.get(x))
    merged_sepsis = pd.merge(county_coordinates, sepsis_data, left_on='state_name', right_on='STATE_FULL', how='left')
    min_sepsis, max_sepsis = merged_sepsis['RATE'].min(), merged_sepsis['RATE'].max()
    normalized_sepsis = merged_sepsis.copy()
    normalized_sepsis['normalized_sepsis'] = (merged_sepsis['RATE'] - min_sepsis) / (max_sepsis - min_sepsis)

    drowning_data['STATE_FULL'] = drowning_data['State'].apply(lambda x: state_abbreviations.get(x))
    merged_drowning = pd.merge(county_coordinates, drowning_data, left_on='state_name', right_on='STATE_FULL', how='left')
    min_drowning, max_drowning = merged_drowning['Dd'].min(), merged_drowning['Dd'].max()
    normalized_drowning = merged_drowning.copy()
    normalized_drowning['normalized_drowning'] = (merged_drowning['Dd'] - min_drowning) / (max_drowning - min_drowning)

    merged_vaccination = pd.merge(county_coordinates, vaccination_data, left_on='state_name', right_on='Location', how='left')
    min_vaccination, max_vaccination = merged_vaccination['Flu Vaccination Rate'].min(), merged_vaccination['Flu Vaccination Rate'].max()
    normalized_vaccination = merged_vaccination.copy()
    normalized_vaccination['normalized_vaccination'] = (merged_vaccination['Flu Vaccination Rate'] - min_vaccination) / (max_vaccination - min_vaccination)

    merged_flu = pd.merge(county_coordinates, flu_data, left_on='state_name', right_on='STATENAME', how='left')
    min_flu, max_flu = merged_flu['ACTIVITY_LEVEL'].min(), merged_flu['ACTIVITY_LEVEL'].max()
    normalized_flu = merged_flu.copy()
    normalized_flu['normalized_flu'] = (merged_flu['ACTIVITY_LEVEL'] - min_flu) / (max_flu - min_flu)

    pneumonia_data['state_name'] = pneumonia_data['STATE'].apply(lambda x: state_abbreviations.get(x))
    merged_pneumonia = pd.merge(county_coordinates, pneumonia_data, on='state_name', how='left')
    min_pneumonia, max_pneumonia = merged_pneumonia['RATE'].min(), merged_pneumonia['RATE'].max()
    normalized_pneumonia = merged_pneumonia.copy()
    normalized_pneumonia['normalized_pneumonia'] = (merged_pneumonia['RATE'] - min_pneumonia) / (max_pneumonia - min_pneumonia)

    combined_data = pd.merge(normalized_smoking, normalized_copd, on=['lat', 'lng', 'state_name'], suffixes=('_smoking', '_copd'))
    combined_data = pd.merge(combined_data, normalized_covid, on=['lat', 'lng', 'state_name'])
    combined_data = pd.merge(combined_data, normalized_sepsis[['county_fips', 'normalized_sepsis']], on='county_fips')
    combined_data = pd.merge(combined_data, normalized_drowning[['county_fips', 'normalized_drowning']], on='county_fips')
    combined_data = pd.merge(combined_data, normalized_vaccination[['county_fips', 'normalized_vaccination']], on='county_fips')
    combined_data = pd.merge(combined_data, normalized_flu[['county_fips', 'normalized_flu']], on='county_fips')
    combined_data = pd.merge(combined_data, normalized_pneumonia[['county_fips', 'normalized_pneumonia']], on='county_fips')
    return combined_data

def weights(combined_data):
    #combined_data['combined_weighted_value'] = 0.1 * combined_data['normalized_smoking'] + 0.25 * combined_data['normalized_copd'] + 0.2 * combined_data['normalized_covid'] + 0.1 * combined_data['normalized_drowning'] + 0.15 * combined_data['normalized_sepsis'] + 0.05 * combined_data['normalized_vaccination'] + 0.05 * combined_data['normalized_flu'] + 0.1 * combined_data['normalized_pneumonia']
    weights = calculate_weights('state_data_1.csv')

    combined_data['combined_weighted_value'] = (
        weights[0] * combined_data['normalized_smoking'] 
        + weights[1] * combined_data['normalized_copd']
        + weights[2] * combined_data['normalized_covid']
        + weights[3] * combined_data['normalized_drowning']
        + weights[4] * combined_data['normalized_sepsis']
        + weights[5] * combined_data['normalized_flu']
        + weights[6] * combined_data['normalized_pneumonia']
        + weights[7] * combined_data['normalized_vaccination']
    )
    combined_data.dropna(subset=['lat', 'lng', 'combined_weighted_value'], inplace=True)
    heatmap_data = combined_data[['lat', 'lng', 'combined_weighted_value']].values.tolist()
    df = pd.read_csv('ards_data/ARDS_centers.csv')
    df = df.drop_duplicates(subset='Hospital Name')
    locations = [(row['Latitude'], row['Longitude'], row['Hospital Name']) for _, row in df.iterrows()]
    return combined_data, heatmap_data, locations
    

def create_usa_map():
    usa_center_latitude = 40
    usa_center_longitude = -98
    usa_map = folium.Map(location=[usa_center_latitude, usa_center_longitude], zoom_start=4)
    return usa_map

def state_data(combined_data):
    state_data = combined_data.groupby('state_name').mean().reset_index()
    sd = state_data[['state_name','normalized_sepsis', 'normalized_drowning', 'normalized_vaccination', 'normalized_flu', 'normalized_pneumonia', 'normalized_smoking', 'normalized_copd', 'normalized_covid']]
    vals = pd.read_csv('vals.csv')
    sd.to_csv('state_data.csv', index=False)
    sd['vals'] = df_vals['vals']
    return sd


def add_heatmap(usa_map, heatmap_data):
    custom_gradient = {
        0.0: '#0000FF', 
        0.7: '#3399FF',  
        0.89: '#66FF66',  
        0.94: '#FFFF00', 
        1.0: '#FF0000'    
    }
    HeatMap(heatmap_data, gradient=custom_gradient).add_to(usa_map)


def add_marker_cluster(usa_map, locations):
    marker_cluster = MarkerCluster().add_to(usa_map)
    for lat, lon, name in locations:
        folium.Marker(location=[lat, lon], popup=name).add_to(marker_cluster)


def main():
    county_coordinates, smoking_data, copd_data, covid_data, sepsis_data, drowning_data, vaccination_data, flu_data, pneumonia_data, ards_centers = load_data()
    combined_data = preprocess_data(county_coordinates, smoking_data, copd_data, covid_data, sepsis_data, drowning_data, vaccination_data, flu_data, pneumonia_data)
    data, heatmap_data, locations = weights(combined_data)
    usa_map = create_usa_map()
    add_heatmap(usa_map, heatmap_data)
    add_marker_cluster(usa_map, locations)
    usa_map.save('usa_map.html')
    usa_map

if __name__ == '__main__':
    main()
