# Imports

In [8]:
from OSMPythonTools.nominatim import Nominatim
import plotly.graph_objects as go
import plotly.io as pio
from PIL import Image
import pandas as pd
import kaleido
import imageio
import folium
import os
import io

pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "none"

# Data Transformation

In [2]:
# https://www.kaggle.com/datasets/paultimothymooney/latitude-and-longitude-for-every-country-and-state
geo = pd.read_csv('./data/world_country_and_usa_states_latitude_and_longitude_values.csv')
geo = geo[['usa_state_code', 'usa_state', 'usa_state_latitude', 'usa_state_longitude']].dropna()

te = pd.read_csv('./data/train_final.csv')
te = te[['DATE', 'TYPE', 'STATE', 'CASKLDRR', 'CASINJRR']]
te['DATE'] = pd.to_datetime(te['DATE'])
te['CASKLDRR'] = te['CASKLDRR'].astype(int)
te['CASINJRR'] = te['CASINJRR'].astype(int)

ap = pd.read_csv('./data/airplanes_final.csv')
ap = ap[['Date', 'Fatalities', 'Ground', 'State']]
ap['DATE'] = pd.to_datetime(ap['Date'])
ap['Fatalities'] = ap['Fatalities'].astype(int)
ap['Ground'] = ap['Ground'].fillna(0)
ap['Ground'] = ap['Ground'].astype(int)

In [3]:
geo.rename(columns={'usa_state_code':'STATE', 'usa_state':'STATE_NAME', 'usa_state_latitude':'LAT', 'usa_state_longitude':'LON'}, inplace=True)

te_agg = te.groupby([te['DATE'].dt.year, te['STATE']])[['CASKLDRR', 'CASINJRR']].sum().reset_index()
te_agg.rename(columns={'DATE': 'YEAR'}, inplace=True)
te_merged = pd.merge(te_agg, geo, how='inner', on='STATE')
te_options = ['CASKLDRR', 'CASINJRR']

ap_agg = ap.groupby([ap['DATE'].dt.year, ap['State']])[['Fatalities', 'Ground']].sum().reset_index()
ap_agg.rename(columns={'DATE': 'YEAR', 'State':'STATE', 'Fatalities':'KLDAP', 'Ground':'GKLDAP'}, inplace=True)
ap_merged = pd.merge(ap_agg, geo, how='inner', on='STATE')
ap_options = ['KLDAP', 'GKLDAP']

names = {
    'CASKLDRR':"Railroad Deaths",
    'CASINJRR':"Railroad Injuries",
    'KLDAP':"Plane Aircraft Deaths",
    'GKLDAP':"Plane Ground Deaths"
}

# Plot Choropleths

In [4]:
def create_choropleth(df, options, color, type):
    years = sorted(df['YEAR'].unique())

    fig = go.Figure()

    for i, option in enumerate(options):
        name = names[option]
        fig.add_trace(
            go.Choropleth(
                locations=df['STATE'],
                z=df[option],
                colorbar_title=name,
                visible=(i == 0),
                locationmode='USA-states',
                name="Count",
                text=df['STATE_NAME'],
                zmin=0,
                zmax=df[option].max(),
                colorscale=color,
                hovertemplate="<b>%{text}</b><br>Count: %{z}<extra></extra>"
            )
        )
        
    buttons=[
        dict(
            label=names[option],
            method="update",
            args=[{"visible": [False] * i + [True] + [False] * (len(options) - i - 1)}], # updating trace visibility based on button
        ) for i, option in enumerate(options)
    ]

    steps = [
        dict(
            method="update",
            label=str(year),
            args=[{
                    "z": [df[(df['YEAR'] == year)][option] for option in options], # create step based on option button and year slider
                    "locations": [df[(df['YEAR'] == year)]['STATE'] for option in options] # update every option
                }],
        ) for year in years # for all years
    ]

    title = ""
    if type == "Railroad":
        title = f'{type}-related Injuries and Deaths from {years[0]} to {years[-1]}' # display min years to max years
    elif type == "Airplane":
        title = f'{type}-related Aircraft and Ground Deaths from {years[0]} to {years[-1]}'

    fig.update_layout(
        updatemenus=[
            dict(
                buttons=buttons,
                direction="down",
                showactive=True,
                pad={"r": 10, "t": 10},
                x=0,
                y=1.2,
                xanchor="left",
                yanchor="top"
            )
        ],
        sliders=[
            dict(
                active=0,
                currentvalue={"prefix": "Year: "},
                pad={"t": 50},
                steps=steps
            )
        ],
        title=title,
        title_font=dict(size=24),
        title_x=0.45,
        geo_scope="usa",
        template="plotly_white"
    )

    fig.show()


## Trains

In [5]:
create_choropleth(te_merged, te_options, "Greens", "Railroad")

## Airplanes

In [6]:
create_choropleth(ap_merged, ap_options, "Blues", "Airplane")

# Save Imgs + Create GIFs

In [None]:
def save_imgs(df, color, base_path, subdir, img_type):
    dir_path = os.path.join(base_path, subdir)
    os.makedirs(dir_path, exist_ok=True)
    title = ""
    
    years = df['YEAR'].unique()
    for year in years:
        df_year = df[df['YEAR'] == year]
        
        fig = go.Figure()
        
        fig.add_trace(
            go.Choropleth(
                locations=df_year['STATE'],
                z=df_year[img_type],
                colorbar_title=names[img_type],
                locationmode='USA-states',
                name="Count",
                zmin=0,
                zmax=df[img_type].max(),
                colorscale=color
            )
        )
        
        if img_type == "CASKLDRR":
            title = f'Railroad-related Deaths in {year}'
        elif img_type == "CASINJRR":
            title = f'Railroad-related Injuries in {year}'
        elif img_type == "KLDAP":
            title = f'Airplane-related Aircraft Deaths in {year}'
        elif img_type == "GKLDAP":
            title = f'Airplane-related Ground Deaths in {year}'
        
        fig.update_layout(
            title=title,
            title_font=dict(size=24),
            title_x=0.45,
            geo_scope="usa",
            template="plotly_white"
        )

        fig.write_image(f"{dir_path}/{img_type}_{year}.png")
    
    
base_path = "./img/choropleth/"
img_types = ['CASKLDRR', 'CASINJRR', 'KLDAP', 'GKLDAP']

for i, img_type in enumerate(img_types):
    subdir = names[img_type]
    if i < 2:
        save_imgs(te_agg, "Greens", base_path, subdir, img_type)
    else:
        save_imgs(ap_agg, "Blues", base_path, subdir, img_type)

In [None]:
def create_gif(input_folder, output_file, img_type, duration=0.5):
    images = []
    years = sorted([int(img_name.split('_')[-1].split('.')[0]) for img_name in os.listdir(input_folder) if img_name.startswith(img_type)])
    
    for year in years:
        img_path = os.path.join(input_folder, f"{img_type}_{year}.png")
        images.append(imageio.imread(img_path))
    
    imageio.mimsave(output_file, images, duration=duration)


output_path = "./gif/choropleth/"
os.makedirs(output_path, exist_ok=True)

for img_type in img_types:
    input_folder = os.path.join(base_path, names[img_type])
    output_file = os.path.join(output_path, f"{img_type}.gif")
    create_gif(input_folder, output_file, img_type)

# Folium Plots

In [4]:
nominatim = Nominatim()
    
country_loc = nominatim.query("United States").toJSON()[0]
country_lat = country_loc['lat']
country_lon = country_loc['lon']

default_zoom = 4
max_width = 300

[nominatim] downloading data: search


In [5]:
def create_folium_map(base_map, df, options, color, type):
    # iterate through te_merged rows and add markers w/ popup    
    for index, row in df.iterrows():
        if row[options[0]] > 0 or row[options[1]] > 0: # only show incidents where people were injured/died
            popup_info = f"<b>Year:</b> {row['YEAR']}<br><b>State:</b> {row['STATE_NAME']}<br>"
            
            if type == "Railroad":
                popup_info += f"<b>Deaths:</b> {row[options[0]]}<br><b>Injuries:</b> {row[options[1]]}"
            elif type == "Airplane":
                popup_info += f"<b>Aircraft Deaths:</b> {row[options[0]]}<br><b>Ground Deaths:</b> {row[options[1]]}"
            
            popup = folium.Popup(popup_info, max_width=max_width)
            
            folium.Marker(
                location=[row['LAT'], row['LON']],
                icon=folium.Icon(color=color),
                popup=popup
            ).add_to(base_map)

## Trains

In [6]:
# create base map
train_map = folium.Map(location=[country_lat, country_lon], zoom_start=default_zoom)

create_folium_map(train_map, te_merged, te_options, "green", "Railroad")
train_map

## Airplanes

In [7]:
# create base map
plane_map = folium.Map(location=[country_lat, country_lon], zoom_start=default_zoom)

create_folium_map(plane_map, ap_merged, ap_options, "blue", "Airplane")
plane_map

In [11]:
output_path = "./img/choropleth/folium/"
os.makedirs(output_path, exist_ok=True)

for i, map in enumerate([train_map, plane_map]):
    name = "folium_train" if i==0 else "folium_plane"
    map.save(f'{output_path}{name}.html')