In [26]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np


In [27]:
train_df = pd.read_csv('../data/train_final.csv')

In [28]:
#remove na values
train_df = train_df.dropna()

In [29]:
train_df.columns

Index(['DATE', 'TYPE', 'STATE', 'CAUSE', 'TITLE', 'CATEGORY', 'DESCRIPTION',
       'CASKLDRR', 'CASINJRR', 'CARSDMG', 'CARSHZD', 'TEMP', 'VISIBLTY',
       'WEATHER', 'TRNSPD', 'TONS', 'TYPEQ', 'TYPTRK', 'ACCDMG', 'NARR1',
       'NARR2', 'NARR3', 'NARR4', 'NARR5', 'NARR6', 'NARR7', 'NARR8', 'NARR9',
       'NARR10', 'NARR11', 'NARR12', 'NARR13', 'NARR14', 'NARR15', 'Latitude',
       'Longitud'],
      dtype='object')

In [30]:
#extract month and year from date
train_df['month'] = pd.DatetimeIndex(train_df['DATE']).month
train_df['year'] = pd.DatetimeIndex(train_df['DATE']).year

In [31]:
train_df['WEATHER'].unique()

array(['Cloudy', 'Clear', 'Rain', 'Sleet', 'Snow', 'Fog'], dtype=object)

In [32]:
train_df.head()

Unnamed: 0,DATE,TYPE,STATE,CAUSE,TITLE,CATEGORY,DESCRIPTION,CASKLDRR,CASINJRR,CARSDMG,...,NARR10,NARR11,NARR12,NARR13,NARR14,NARR15,Latitude,Longitud,month,year
6425,4/6/2011,Derailment,IN,T314,"Rack, Roadbed and Structures","Frogs, Switches and Track Appliances",Switch point worn or broken,0,0,0,...,EAD TO 2S LEAD. THE CAUSE WAS DETERMINED TO BE...,IT HAD A EXISTING HORIZONTAL CRACK STARTING TW...,AS THE LOADED CARS WERE MOVING ACROSS IT. AS R...,TCH CROSSOVER 2S/14S AND SWITCH 18S SUSTAINED ...,TIES. THE RCO FOREMAN OBSERVED THE DERAILMENT ...,HELPER TO STOP THE MOVEMENT. THE DERAILED CARS...,0.0,0.0,4,2011
11212,10/6/2003,Other Impacts,MN,M599,Miscellaneous Causes Not Otherwise Listed,Other Miscellaneous,Other miscellaneous causes (Provide detailed d...,0,0,0,...,AD. A TOTAL OF NINE CARS HAD BEEN PULLED BY T...,R THE ENGINE HAD BEEN BY THE 32 SWITCH SEVERAL...,ON 32 TRACK AT THE TIME OF IMPACT. THE WEST C...,11110 A EMPTY BOXCAR AND DERAILED ONTO ITS SID...,"AT 0030, THE HUMP HAD HUMPED 14 LOADS OF GRAIN...","HIN SECONDS OR MINUTES OF EACH OTHER, CAME OUT...",0.0,0.0,10,2003
11213,10/6/2003,Other Impacts,MN,M599,Miscellaneous Causes Not Otherwise Listed,Other Miscellaneous,Other miscellaneous causes (Provide detailed d...,0,0,0,...,AD. A TOTAL OF NINE CARS HAD BEEN PULLED BY T...,R THE ENGINE HAD BEEN BY THE 32 SWITCH SEVERAL...,ON 32 TRACK AT THE TIME OF IMPACT. THE WEST C...,11110 A EMPTY BOXCAR AND DERAILED ONTO ITS SID...,"AT 0030, THE HUMP HAD HUMPED 14 LOADS OF GRAIN...","HIN SECONDS OR MINUTES OF EACH OTHER, CAME OUT...",0.0,0.0,10,2003
11390,7/27/2010,Derailment,WA,M599,Miscellaneous Causes Not Otherwise Listed,Other Miscellaneous,Other miscellaneous causes (Provide detailed d...,0,0,0,...,"& WHILE NO CONTRACTORS ARE PRESENT, THE DERAIL...",\r\nTHE DERAIL WAS PLACED FOR PROTECTION OF TH...,THE TRACKS DURING CONSTRUCTION OF A STREET OVE...,RAIL DURING THE TRAIN MOVEMENT.\r\nTHE MECHANI...,IF ONE OF THE PIECES TORN OFF THE RAILCAR OR I...,DERAIL. MARKS ON THE DERAIL ITSELF CONFIRM SO...,0.0,0.0,7,2010
11432,8/27/2011,Derailment,WV,M599,Miscellaneous Causes Not Otherwise Listed,Other Miscellaneous,Other miscellaneous causes (Provide detailed d...,0,0,0,...,X FOREMAN HEARD WAS THE GRAPPLE TRUCK OPERATOR...,ICKING UP SPEED. CSX FOREMAN DOES NOT RECALL H...,PERVISOR WHO WAS ON THE RAIL 2 MILES AHEAD OF ...,E RAIL AT MP BAH-29.8. HE WATCHED THE PROGRESS...,30 FEET PAST THE CROSSING WHERE IT DERAILED AT...,E. BOTH PROGRESS RAIL EMPLOYEES HAD JUMPED OUT...,39.473384,-79.075912,8,2011


In [33]:
import plotly.express as px

fig = px.scatter(train_df, x="DATE", y="TEMP", size="ACCDMG", color="WEATHER", 
                 hover_name="DESCRIPTION", size_max=80, template="plotly_dark",
                 color_discrete_sequence=px.colors.sequential.BuPu)

fig.update_layout(title="Weather Conditions and Temperature effect on Accidents",
                  xaxis_title="Date",
                  yaxis_title="Temperature (°F)")

fig.show()

In [34]:
states = {
    'AL': 'Alabama',
    'AK': 'Alaska',
    'AZ': 'Arizona',
    'AR': 'Arkansas',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'IA': 'Iowa',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'MD': 'Maryland',
    'MA': 'Massachusetts',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MS': 'Mississippi',
    'MO': 'Missouri',
    'MT': 'Montana',
    'NE': 'Nebraska',
    'NV': 'Nevada',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NY': 'New York',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VT': 'Vermont',
    'VA': 'Virginia',
    'WA': 'Washington',
    'WV': 'West Virginia',
    'WI': 'Wisconsin',
    'WY': 'Wyoming'
}

train_df['STATE'] = train_df['STATE'].map(states)

In [35]:
# Filter for the top 10 states by number of accidents
train_df = train_df[train_df['STATE'].isin(train_df['STATE'].value_counts().index[:10])]

# Create a radar chart
fig = px.line_polar(train_df, r='ACCDMG', theta='STATE', line_close=True)

# Set the title
fig.update_layout(title='Train Accidents Damage in States ', font=dict(family='Arial'))

# Show the chart
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

