In [None]:
import pandas as pd
import plotly.graph_objects as go

import kaleido
import plotly.io as pio

In [None]:
train_df = pd.read_csv('../data/train_final.csv')

In [None]:
#remove na values
train_df = train_df.dropna()

In [None]:
train_df.columns

In [None]:
#extract month and year from date
train_df['month'] = pd.DatetimeIndex(train_df['DATE']).month
train_df['year'] = pd.DatetimeIndex(train_df['DATE']).year

In [None]:
train_df['WEATHER'].unique()

In [None]:
train_df.head()

In [None]:
import plotly.express as px

fig = px.scatter(train_df, x="DATE", y="TEMP", size="ACCDMG", color="WEATHER", 
                 hover_name="DESCRIPTION", size_max=80, 
                    color_discrete_sequence=("#2ECC71","#F7DC6F","#F0B27A","#99A3A4","#C39BD3","#5D6D7E"))

fig.update_layout(title="Weather Conditions and Temperature effect on Train Accidents <br><sup>Source: Federal Railroad Admistration</sup>",
                  xaxis_title="Time",
                  yaxis_title="Temperature (°F)")

fig.show()

# save image
fig.write_image('../img/bubble/Weather_Conditions_and_Temperature_effect_on_Accidents.png')

# save html
pio.write_html(fig, file='../img/bubble/Number_of_Train_Accidents_by_State_for_Each_Year.html', auto_open=False)

In [None]:
states = {
    'AL': 'Alabama',
    'AK': 'Alaska',
    'AZ': 'Arizona',
    'AR': 'Arkansas',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'IA': 'Iowa',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'MD': 'Maryland',
    'MA': 'Massachusetts',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MS': 'Mississippi',
    'MO': 'Missouri',
    'MT': 'Montana',
    'NE': 'Nebraska',
    'NV': 'Nevada',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NY': 'New York',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VT': 'Vermont',
    'VA': 'Virginia',
    'WA': 'Washington',
    'WV': 'West Virginia',
    'WI': 'Wisconsin',
    'WY': 'Wyoming'
}

train_df['STATE'] = train_df['STATE'].map(states)

In [None]:
# Filter for the top 10 states by number of accidents
train_df = train_df[train_df['STATE'].isin(train_df['STATE'].value_counts().index[:10])]

# Create a radar chart
fig = px.line_polar(train_df, r='ACCDMG', theta='STATE', line_close=True)

# Set the title
fig.update_layout(title='Train Accidents Damage in States ', font=dict(family='Arial'))

# Show the chart
fig.show()