In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import kaleido

TRAIN HISTOGRAM

In [None]:
# Read the data
df = pd.read_csv("../data/train_final.csv")
df.head()

In [None]:
# extract year from DATE column
df['YEAR'] = pd.to_datetime(df['DATE']).dt.year

# Filter data for relevant columns and group by state and year
df_filtered = df[['STATE', 'YEAR']].groupby(['STATE', 'YEAR']).size().reset_index(name='ACCIDENTS')

# Create the figure
fig = px.histogram(df_filtered, x="STATE", y="ACCIDENTS", color="STATE", animation_frame="YEAR",
                   range_y=[0, df_filtered['ACCIDENTS'].max()], nbins=len(df_filtered['STATE'].unique()), color_discrete_sequence = ['darkseagreen'])
# Remove the legend
fig.update_layout(showlegend=False)



# Update the layout
fig.update_layout(
    title='Number of Train Accidents by State for Each Year <br><sup>Source: Federal Railroad Admistration</sup>',
    xaxis_title='State',
    yaxis_title='Number of Accidents'
)


# Show the plot
fig.show()

# save image as png
fig.write_image("../img/histogram/Number_of_Train_Accidents_by_State_for_Each_Year.png")



In [None]:
import plotly.express as px

# Get the top 3 causes of accidents
top_causes = df['CAUSE'].value_counts().nlargest(3).index.tolist()

# Create a new DataFrame with only the relevant data
filtered_df = df.loc[(df['CAUSE'].isin(top_causes)), ['STATE', 'CAUSE']]

# Group the filtered data by cause and state and count the number of accidents
grouped_df = filtered_df.groupby(['CAUSE', 'STATE']).size().reset_index(name='ACCIDENTS')


# Create the plot using Plotly Express
fig = px.histogram(grouped_df, x='STATE', y='ACCIDENTS', color='CAUSE',
                   facet_row='CAUSE', facet_col_wrap=3,
                   facet_row_spacing=0.2, color_discrete_sequence=["#005F00","#008F00","#00AF00"])

# Update the layout for the entire figure
fig.update_layout(
    title = "Top 3 Causes of Train Accidents for All States <br><sup>Source: Federal Railroad Admistration</sup>",
    xaxis_title='States',
    yaxis_title='Number of Accident',
     width=800,  # set the width of the plot to 800 pixels
    height=600 # set the height of the plot to 600 pixels
)
fig.update_yaxes(title='Number of Accident', visible=True, showticklabels=True)
newnames = {'H702':'Switch Improperly Lined', 'M399': 'Other Causes','T110': 'Wide Gage' }
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )


# Show the plot
fig.show()

# Save the plot as a PNG image
fig.write_image("../img/histogram/Top_3_Causes_of_Train_Accidents_for_All_States.png")


In [None]:
import plotly.express as px

# Get the top 3 causes of accidents
top_causes = df['CAUSE'].value_counts().nlargest(3).index.tolist()

# Create a new DataFrame with only the relevant data
filtered_df = df.loc[(df['CAUSE'].isin(top_causes)), ['STATE', 'CAUSE']]

# Group the filtered data by cause and state and count the number of accidents
grouped_df = filtered_df.groupby(['CAUSE', 'STATE']).size().reset_index(name='ACCIDENTS')

# Define the regions for each state
region_dict = {
    'AL': 'South',
    'AK': 'West',
    'AZ': 'West',
    'AR': 'South',
    'CA': 'West',
    'CO': 'West',
    'CT': 'Northeast',
    'DE': 'South',
    'FL': 'South',
    'GA': 'South',
    'HI': 'West',
    'ID': 'West',
    'IL': 'Midwest',
    'IN': 'Midwest',
    'IA': 'Midwest',
    'KS': 'Midwest',
    'KY': 'South',
    'LA': 'South',
    'ME': 'Northeast',
    'MD': 'South',
    'MA': 'Northeast',
    'MI': 'Midwest',
    'MN': 'Midwest',
    'MS': 'South',
    'MO': 'Midwest',
    'MT': 'West',
    'NE': 'Midwest',
    'NV': 'West',
    'NH': 'Northeast',
    'NJ': 'Northeast',
    'NM': 'West',
    'NY': 'Northeast',
    'NC': 'South',
    'ND': 'Midwest',
    'OH': 'Midwest',
    'OK': 'South',
    'OR': 'West',
    'PA': 'Northeast',
    'RI': 'Northeast',
    'SC': 'South',
    'SD': 'Midwest',
    'TN': 'South',
    'TX': 'South',
    'UT': 'West',
    'VT': 'Northeast',
    'VA': 'South',
    'WA': 'West',
    'WV': 'South',
    'WI': 'Midwest',
    'WY': 'West'
}

# Add a new column to the DataFrame with the region for each state
grouped_df['REGION'] = grouped_df['STATE'].map(region_dict)

# Create the plot using Plotly Express
fig = px.histogram(grouped_df, x='REGION', y='ACCIDENTS', color='CAUSE',
                   facet_row='CAUSE', facet_col_wrap=3,
                   facet_row_spacing=0.2,
                    color_discrete_sequence=["#81C784","#4CAF50","#558B2F"])

# Update the layout for the entire figure
fig.update_layout(
    title="Top 3 Causes of Train Accidents by Region <br><sup>Source: Federal Railroad Administration</sup>",
    xaxis_title='Region',
    yaxis_title='Number of Accidents',
    width=800,
    height=600
)

fig.update_yaxes(title='Number of Accident', visible=True, showticklabels=True)
fig.update_yaxes(title='Number of Accident', visible=True, showticklabels=True)
newnames = {'H702':'Switch Improperly Lined', 'M399': 'Other Causes','T110': 'Wide Gage' }
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
# Show the plot
fig.show()

# Save the plot as a PNG image
fig.write_image("../img/histogram/Top_3_Causes_of_Train_Accidents_by_Region.png")


In [None]:
# Calculating the percentage of accidents
accident_counts = df['TYPE'].value_counts(normalize=True) * 100

# Creating the plot
fig = go.Figure()

# Add the trace
fig.add_trace(go.Bar(
    x=accident_counts.index,
    y=accident_counts.values,
    text=accident_counts.round(2),
    textposition='auto',
    marker_color='#355E3B'
))

# Update the layout
fig.update_layout(
    title="Percentage of Train Accidents by Type<br><sup>Source: Federal Railroad Admistration</sup>",
    xaxis_title="Type of Accident",
    yaxis_title="% of Total Accidents",
    showlegend=False,
    height=500,
    width=800
)

fig.show()

# save image as png
fig.write_image("../img/histogram/Percentage_of_Train_Accidents_by_Type.png")


In [None]:
fig = px.histogram(df, x="WEATHER", title="Number of Train Accidents by Weather Condition <br><sup>Source: Federal Railroad Admistration</sup>",
                   color_discrete_sequence = ['#A9BA9D'])

# Update the layout
fig.update_layout(
    xaxis_title="Weather Condition",
    yaxis_title='Number of Accident',
    showlegend=False,
)
fig.show()

# save image as png
fig.write_image("../img/histogram/Number_of_Train_Accidents_by_Weather_Condition.png")


In [None]:
# INITIALIZE GRAPH OBJECT
fig = go.Figure()

# TRACE-1: histogram of train accidents
trace1 = go.Histogram(
    x=df['TYPEQ'],
    name='Type of Train',
    marker_color='green'
)

# TRACE-2: histogram of track accidents
trace2 = go.Histogram(
    x=df['TYPTRK'],
    name='Type of Track',
    marker_color='#66CDAA'
)

# ADDING TRACE TO FIGURE
fig.add_trace(trace1)
fig.add_trace(trace2)

# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label="Type of Train",
                        method="update",
                        args=[{"visible": [True, False]},
                            {"title": "Histogram of Train Accidents by Type of Train"}]),
                dict(label="Type of Track",
                        method="update",
                        args=[{"visible": [False, True]},
                            {"title": "Histogram of Train Accidents by Type of Track"}])
            ]),
            direction="down",
            showactive=True,
            x=0.00,
            y=1.6
        )
    ]
)

# UPDATE LAYOUT
fig.update_layout(
    title_text='Number of Train Accidents by Type of Train <br><sup>Source: Federal Railroad Admistration</sup>',
    xaxis_title_text='Type of Train',
    yaxis_title_text='Number of Accidents',
    barmode='overlay',
    bargap=0.2,
    bargroupgap=0.1
)



# Show the figure
fig.show()

# save image as png
fig.write_image("../img/histogram/Number_of_Train_Accidents_by_Type_of_Train.png")


AIRPLANES HISTOGRAMS

In [None]:
# Read the data
df = pd.read_csv("../data/airplanes_final.csv")
df.head()

In [None]:
# extract year from DATE column
df['Year'] = pd.to_datetime(df['Date']).dt.year

# create a histogram of the number of crashes per year
fig = px.histogram(df, x='Year',color_discrete_sequence = ['#7CB9E8'])

# customize the plot
fig.update_layout(
    title='Number of Flight Crashes by Year<br><sup>Source: Kaggle</sup>',
    xaxis_title='Year',
    yaxis_title='Number of Flight Crashes'
)

# display the plot
fig.show()

# save image as png
fig.write_image("../img/histogram/Number_of_Flight_Crashes_by_Year.png")


In [None]:
# extract year from DATE column
df['Year'] = pd.to_datetime(df['Date']).dt.year

# Filter data for relevant columns and group by state and year
df_filtered = df[['State', 'Year']].groupby(['State', 'Year']).size().reset_index(name='Crashes')

# Create the figure
fig = px.histogram(df_filtered, x="State", y="Crashes", color="State", animation_frame="Year",
                   range_y=[0, df_filtered['Crashes'].max()], nbins=len(df_filtered['State'].unique()), color_discrete_sequence = ['#72A0C1'])
# Remove the legend
fig.update_layout(showlegend=False)



# Update the layout
fig.update_layout(
    title='Number of Flight Accidents by State for Each Year<br><sup>Source: Kaggle</sup>',
    xaxis_title='State',
    yaxis_title='Number of Accidents'
)

# Show the plot
fig.show()

# save image as png
fig.write_image("../img/histogram/Number_of_Flight_Accidents_by_State_for_Each_Year.png")


In [None]:
# INITIALIZE GRAPH OBJECT
fig = go.Figure()

# Group the data by aircraft type and count the number of crashes
type_crashes = df.groupby('Type').count()['Date'].reset_index(name='count')

# Sort the data by count in descending order
type_crashes = type_crashes.sort_values(by='count', ascending=False)

# Get the top 5 aircraft types with the most flight crashes
top_5_types = type_crashes.iloc[:5]

# TRACE-1: Top 5 histogram of Operators
fig.add_trace(go.Bar(x=top_5_types['Type'], y=top_5_types['count'], name='Top 5 Aircraft Types', marker_color='#72A0C1'))

# Group the data by aircraft type and count the number of crashes
type_crashes = df.groupby('Type').count()['Date'].reset_index(name='count')

# Sort the data by count in descending order
type_crashes = type_crashes.sort_values(by='count', ascending=False)

# Get the top 5 aircraft types with the most flight crashes
top_5_types = type_crashes.iloc[:5]

# TRACE-2: Top 5 AIRCARFTS
fig.add_trace(go.Bar(x=top_5_types['Type'], y=top_5_types['count'], name='Top 5 Aircrafts', marker_color="#13274F"))


# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label="Top 5 Operators",
                        method="update",    
                        args=[{"visible": [True, False]},
                            {"title": "Top 5 Operators with the Most Flight Crashes"}]),
                dict(label="Top 5 Aircrafts",
                        method="update",
                        args=[{"visible": [False, True]},
                            {"title": "Top 5 Aircrafts with the Most Flight Crashes"}])
            ]),
        )
    ]
)

# Update the layout
fig.update_layout(
    title='Top 5 Operators with the Most Flight Crashes<br><sup>Source: Kaggle</sup>',
    xaxis_title='Operator',
    yaxis_title='Number of Crashes'
)

# Show the plot
fig.show()

# save image as png
fig.write_image("../img/histogram/Top_5_Operators_with_the_Most_Flight_Crashes.png")


In [None]:
# Create a histogram of fatalities
fig = go.Figure(data=[go.Histogram(x=df['Fatalities'], nbinsx=20, marker_color = '#6495ED')])
fig.update_layout(title="Fatalities in Flight Crashes<br><sup>Source: Kaggle</sup>", xaxis_title="Number of Fatalities", yaxis_title="Count")

# Display the plot
fig.show()

# save image as png
fig.write_image("../img/histogram/Fatalities_in_Flight_Crashes.png")
