# Imports

In [32]:
from OSMPythonTools.nominatim import Nominatim
from plotly.subplots import make_subplots
from folium.plugins import MarkerCluster
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
from PIL import Image
import pandas as pd
import numpy as np
import kaleido
import imageio
import folium
import os
import io

pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "none"

# Data Transformation

In [2]:
# https://www.kaggle.com/datasets/paultimothymooney/latitude-and-longitude-for-every-country-and-state
geo = pd.read_csv('./data/world_country_and_usa_states_latitude_and_longitude_values.csv')
geo = geo[['usa_state_code', 'usa_state', 'usa_state_latitude', 'usa_state_longitude']].dropna()

te = pd.read_csv('./data/train_final.csv')
te = te[['DATE', 'TYPE', 'STATE', 'CASKLDRR', 'CASINJRR']]
te['DATE'] = pd.to_datetime(te['DATE'])
te['CASKLDRR'] = te['CASKLDRR'].astype(int)
te['CASINJRR'] = te['CASINJRR'].astype(int)

ap = pd.read_csv('./data/airplanes_final.csv')
ap = ap[['Date', 'Fatalities', 'Ground', 'State']]
ap['DATE'] = pd.to_datetime(ap['Date'])
ap['Fatalities'] = ap['Fatalities'].astype(int)
ap['Ground'] = ap['Ground'].fillna(0)
ap['Ground'] = ap['Ground'].astype(int)

In [3]:
geo.rename(columns={'usa_state_code':'STATE', 'usa_state':'STATE_NAME', 'usa_state_latitude':'LAT', 'usa_state_longitude':'LON'}, inplace=True)

te_agg = te.groupby([te['DATE'].dt.year, te['STATE']])[['CASKLDRR', 'CASINJRR']].sum().reset_index()
te_agg.rename(columns={'DATE': 'YEAR'}, inplace=True)
te_merged = pd.merge(te_agg, geo, how='inner', on='STATE')
te_options = ['CASKLDRR', 'CASINJRR']
te_merged = te_merged.sort_values('YEAR')

ap_agg = ap.groupby([ap['DATE'].dt.year, ap['State']])[['Fatalities', 'Ground']].sum().reset_index()
ap_agg.rename(columns={'DATE': 'YEAR', 'State':'STATE', 'Fatalities':'KLDAP', 'Ground':'GKLDAP'}, inplace=True)
ap_merged = pd.merge(ap_agg, geo, how='inner', on='STATE')
ap_options = ['KLDAP', 'GKLDAP']
ap_merged = ap_merged.sort_values('YEAR')

names = {
    'CASKLDRR':"Railroad Deaths",
    'CASINJRR':"Railroad Injuries",
    'KLDAP':"Plane Aircraft Deaths",
    'GKLDAP':"Plane Ground Deaths"
}

# Plot Choropleths

In [7]:
def create_choropleth(df, names, option, color):
    name = names[option]
    years = sorted(df['YEAR'].unique())
    min_value = df[option].min()
    max_value = df[option].max()

    fig = px.choropleth(
        df,
        locations="STATE",
        color=option,
        color_continuous_scale=color,
        hover_name="STATE_NAME",
        hover_data={option: True},
        animation_frame="YEAR",
        locationmode="USA-states",
        scope="usa",
        labels={option: name},
        title=f"{name} from {years[0]} to {years[-1]}",
        range_color=(min_value, max_value)
    )

    # add animation controls
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 500
    fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 0
    fig.layout.sliders[0].pad.t = 10

    fig.show()
    return fig

## Trains

In [30]:
rr_death = create_choropleth(te_merged, names, te_options[0], "Greens")

In [31]:
rr_inj = create_choropleth(te_merged, names, te_options[1], "Greens")

## Airplanes

In [32]:
ap_airdeaths = create_choropleth(ap_merged, names, ap_options[0], "Blues")

In [33]:
ap_gdeaths = create_choropleth(ap_merged, names, ap_options[1], "Blues")

# Save Imgs as HTML

In [11]:
base_path = "./img/choropleth/"
imgs = [rr_death, rr_inj, ap_airdeaths, ap_gdeaths]
img_names = ['CASKLDRR', 'CASINJRR', 'KLDAP', 'GKLDAP']

for i, img in enumerate(imgs):
    pio.write_html(img, file=f'{base_path}{img_names[i]}.html')

# Folium Plots

In [12]:
nominatim = Nominatim()
    
country_loc = nominatim.query("United States").toJSON()[0]
country_lat = country_loc['lat']
country_lon = country_loc['lon']

default_zoom = 4
max_width = 300

[nominatim] downloading data: search


In [34]:
def create_folium_map(base_map, df, options, color, type):
    df_sorted = df.sort_values(by=[options[0], options[1]], ascending=False)
    
    marker_cluster = MarkerCluster().add_to(base_map)

    # iterate through te_merged rows and add markers w/ popup    
    for idx, row in df_sorted.iterrows():
        if row[options[0]] > 0 or row[options[1]] > 0: # only show incidents where people were injured/died
            popup_info = f"<b>Year:</b> {row['YEAR']}<br><b>State:</b> {row['STATE_NAME']}<br>"
            
            if type == "Railroad":
                popup_info += f"<b>Deaths:</b> {row[options[0]]}<br><b>Injuries:</b> {row[options[1]]}"
            elif type == "Airplane":
                popup_info += f"<b>Aircraft Deaths:</b> {row[options[0]]}<br><b>Ground Deaths:</b> {row[options[1]]}"
                            
            popup = folium.Popup(popup_info, max_width=max_width)
            
            folium.Marker(
                location=[row['LAT'], row['LON']],
                icon=folium.Icon(color=color),
                popup=popup
            ).add_to(marker_cluster)

## Trains

In [30]:
# create base map
train_map = folium.Map(location=[country_lat, country_lon], zoom_start=default_zoom)

create_folium_map(train_map, te_merged, te_options, "green", "Railroad")
train_map

## Airplanes

In [29]:
# create base map
plane_map = folium.Map(location=[country_lat, country_lon], zoom_start=default_zoom)

create_folium_map(plane_map, ap_merged, ap_options, "blue", "Airplane")
plane_map

In [16]:
for i, map in enumerate([train_map, plane_map]):
    name = "folium_train" if i==0 else "folium_plane"
    map.save(f'{base_path}{name}.html')

# Scattergeom ("innovative")

In [75]:
# Inspired by the Avocado Project (https://marckvaisman.georgetown.domains/anly503/project/avocados/website/index.html)

def create_scattergeo(df, names, option, year, min_scale, max_scale):    
    name = names[option]
    years = sorted(df['YEAR'].unique())
    icon = '🚆' if option in te_options else '✈️'
    zoom = 2 if option in te_options else 1.7
    lat = 50 if option in te_options else 50
    lon = -110 if option in te_options else -120
    
    filtered_data = df[df['YEAR'] == year]
    filtered_data = filtered_data[filtered_data[option] != 0] # no need to show rows that have no accidents
    
    min_val = df[option].min()
    max_val = df[option].max()
    epsilon = 1e-9
    normalized_data = (filtered_data[option] - min_val) / (max_val - min_val + epsilon) # epsilon to prevent division by zero
    scaled_data = np.sqrt(normalized_data) * (max_scale - min_scale) + min_scale # scale icon sizes based on the square root of the normalized data

    
    fig = go.Figure(
        go.Scattergeo(
            lon = filtered_data['LON'],
            lat = filtered_data['LAT'],
            mode='text',
            text = icon,
            textfont_size = min_scale + np.sqrt(filtered_data[option] / max_val) * (max_scale - min_scale) # scale icon sizes based on the square root of the ratio (for proportionality)
        )
    )
    
    fig.update_layout(
        title_text=f"<b>{name} from {years[0]} to {years[-1]}</b>",
        height = 600,
        width = 950,
        paper_bgcolor = '#ADD8E6',
        plot_bgcolor = '#ADD8E6',
        margin = dict(
            l = 0,
            r = 160,
            t = 60,
            b = 0,
            pad = 0
        ),
        geo = dict(
            scope='north america',
            landcolor='#d9d9d9',
            lakecolor = '#ADD8E6',
            showocean = True,
            oceancolor = '#ADD8E6',
            resolution = 50,
            showsubunits=True,
            subunitcolor="Black",
            projection_scale = zoom,
            center = dict(
                lat = lat,
                lon = lon
            ),
        )
    )
    
    # Legend Title
    fig.add_annotation(
        x=1.18,
        y=0.85,
        xref='paper',
        yref='paper',
        text=name,
        font={'size':15},
        showarrow= False
    )

    # Legend Figure Size
    legend_max = np.ceil(min_scale + (max_scale - min_scale) * np.sqrt(max_val / max_val))
    legend_75 = np.ceil(min_scale + (max_scale - min_scale) * np.sqrt((max_val * 0.75) / max_val))
    legend_50 = np.ceil(min_scale + (max_scale - min_scale) * np.sqrt((max_val * 0.5) / max_val))
    legend_25 = np.ceil(min_scale + (max_scale - min_scale) * np.sqrt((max_val * 0.25) / max_val))
    
    # Legend Box
    fig.add_shape(
        type="rect",
        x0=1.02,
        y0=0.3,
        x1=1.19,
        y1=0.8,
        line=dict(
            color="Black", 
            width = 0.2
        ),
    )
    
    # Populate Legend
    fig.add_annotation(
        x=1.19,
        y=0.8,
        xref='paper',
        yref='paper',
        text=icon,
        font={'size': int(legend_max)},
        align = 'center',
        showarrow = False
    )
    fig.add_annotation(
        x=1.07,
        y=0.75,
        xref='paper',
        yref='paper',
        text=str(int(df[option].max())),
        font={'size':15},
        showarrow= False
    )

    fig.add_annotation(
        x=1.185,
        y=0.59,
        xref='paper',
        yref='paper',
        text=icon,
        font={'size': int(legend_75)},
        align = 'center', 
        showarrow = False
    )
    fig.add_annotation(
        x=1.07,
        y=0.58,
        xref='paper',
        yref='paper',
        text=str(int(df[option].max() / 4 * 3)),
        font={'size':15},
        showarrow= False
    )

    fig.add_annotation(
        x=1.18,
        y=0.46,
        xref='paper',
        yref='paper',
        text=icon,
        font={'size': int(legend_50)},
        align = 'right',
        showarrow = False
    )
    fig.add_annotation(
        x=1.07,
        y=0.46,
        xref='paper',
        yref='paper',
        text=str(int(df[option].max() / 2)),
        font={'size':15},
        showarrow= False
    )

    fig.add_annotation(
        x=1.17,
        y=0.36,
        xref='paper',
        yref='paper',
        text=icon,
        font={'size': int(legend_25)},
        align = 'right',
        showarrow = False
    )
    fig.add_annotation(
        x=1.07,
        y=0.36,
        xref='paper',
        yref='paper',
        text=str(int(df[option].max() / 4)),
        font={'size':15},
        showarrow= False
    )

    # year declaration
    fig.add_annotation(
        x=0.2,
        y=0.16,
        xref='paper',
        yref='paper',
        text= str(year),
        font={'size':30},
        showarrow= False
    )
    
    # save fig to png
    img_data = fig.to_image(format="png")
    img = Image.open(io.BytesIO(img_data))
    
    return img

In [76]:
def generate_gif(df, options):
    output_dir = './img/choropleth/innovative/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for option in options:
        frames = []
        years = sorted(df['YEAR'].unique())
        frames = [create_scattergeo(df, names, option, year, 4, 48) for year in years] # generate frames for each year
        
        # convert frames to gif
        frames[0].save(
            os.path.join(output_dir, f'{option}.gif'),
            save_all=True,
            append_images=frames[1:],
            duration=700,
            loop=0
        )

## Trains

In [77]:
generate_gif(te_merged, te_options)

## Airplanes

In [78]:
generate_gif(ap_merged, ap_options)