In [None]:
from IPython import get_ipython
from IPython.display import display
from IPython.display import IFrame, display

# To view: http://127.0.0.1:8050/

# Install required packages
# !pip install dash pandas plotly
#!pip install jupyter-dash==0.4.2


import dash
from dash import dcc, html
from dash.dependencies import Input, Output  # Import for callbacks
import plotly.express as px
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings


# Create Dash app
app = dash.Dash(__name__)

ntsb_data = pd.read_csv(
            '../data/processed/ntsb_data_cleaned.csv',
            encoding='ISO-8859-1',
            low_memory=False
        )
asrs_data = pd.read_csv('../data/processed/asrs_data_cleaned.csv')

# Load the merged dataset
merged_data = pd.read_csv('../data/processed//merged_asrs_ntsb.csv', encoding='ISO-8859-1',low_memory=False)

# Create a function for top states chart from NTSB data
def create_ntsb_top_states_chart(ntsb_data):
    state_accidents = ntsb_data['state'].value_counts().head(10)
    fig = px.bar(
        x=state_accidents.index,
        y=state_accidents.values,
        labels={'x': 'State', 'y': 'Number of Accidents'},
        title="Top 10 States by Frequency of Accidents (NTSB Data)",
        color=state_accidents.values,
        color_continuous_scale='Viridis'
    )
    return fig

# Create a function for injury counts chart from NTSB data
def create_ntsb_injury_counts_chart(ntsb_data):
    if {'fatalinjurycount', 'seriousinjurycount', 'minorinjurycount'}.issubset(ntsb_data.columns):
        total_injuries = ntsb_data[['fatalinjurycount', 'seriousinjurycount', 'minorinjurycount']].sum()
        injury_counts = total_injuries.reset_index()
        injury_counts.columns = ['Injury Type', 'Count']
        fig = px.bar(
            injury_counts,
            x='Injury Type',
            y='Count',
            labels={'x': 'Injury Type', 'y': 'Count'},
            title="Total Injury Counts in NTSB Data",
            color='Count',
            color_continuous_scale='Magma'
        )
        return fig

# Create a function for weather condition comparison chart
def create_weather_comparison_chart(ntsb_data, asrs_data):
    # Filter data for Cessna aircraft
    ntsb_cessna_data = ntsb_data[ntsb_data['make'] == 'CESSNA']
    asrs_cessna_data = asrs_data[asrs_data['make_model_name'].str.startswith('Cessna')]

    # Count weather conditions
    ntsb_cessna_weather_counts = ntsb_cessna_data['weathercondition'].value_counts()
    asrs_cessna_weather_counts = asrs_cessna_data['flight_conditions'].value_counts()

    # Create Plotly figures for pie charts
    fig_ntsb = px.pie(
        names=ntsb_cessna_weather_counts.index,
        values=ntsb_cessna_weather_counts.values,
        title='NTSB Weather Conditions (Cessna Aircraft)',
        color_discrete_sequence=px.colors.sequential.Blues  # Use Blues color scale
    )

    fig_asrs = px.pie(
        names=asrs_cessna_weather_counts.index,
        values=asrs_cessna_weather_counts.values,
        title='ASRS Weather Conditions (Cessna Aircraft)',
        color_discrete_sequence=px.colors.sequential.Oranges  # Use Oranges color scale
    )

    # Create subplots and add pie charts using graph_objects
    from plotly.subplots import make_subplots
    import plotly.graph_objects as go

    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'pie'}, {'type': 'pie'}]])
    fig.add_trace(go.Pie(labels=fig_ntsb.data[0].labels, values=fig_ntsb.data[0].values), row=1, col=1)
    fig.add_trace(go.Pie(labels=fig_asrs.data[0].labels, values=fig_asrs.data[0].values), row=1, col=2)
    fig.update_layout(title_text="Weather Conditions Comparison (Cessna Aircraft)")

    return fig  # Return the Plotly figure

def create_accident_trends_chart(merged_data):
    # Create a copy to avoid potential SettingWithCopyWarning
    df = merged_data.copy()

    # Convert 'eventdate' to datetime and handle errors
    df['eventdate'] = pd.to_datetime(df['eventdate'], errors='coerce')

    # Drop rows with invalid or missing dates
    df = df.dropna(subset=['eventdate'])

    # Extract the year using .loc
    df.loc[:, 'Year'] = df['eventdate'].dt.year

    # Count accidents per year
    accident_trends = df['Year'].value_counts().sort_index()

    # Create the Plotly line chart
    fig = px.line(
        x=accident_trends.index,
        y=accident_trends.values,
        title="Combined Accident Trends Over Time (ASRS + NTSB)",
        labels={'x': 'Year', 'y': 'Number of Accidents'}
    )
    fig.update_traces(mode='lines+markers', marker=dict(color='blue'))

    return fig

# Explanation text for graphs (modified to return sections)
def explanation_text_top_states(section=None):
    explanations = {
        'start': "**Overall Summary:** This graph visually represents the top 10 states in the United States with the highest frequency of airplane accidents. By identifying these high-risk areas, policymakers and safety organizations can focus resources and efforts on implementing preventative measures to enhance aviation safety in these specific regions.",
        'a': "**a) Statement of the Problem (Top States):** The problem is the disproportionate number of airplane accidents occurring in certain states, indicating potential safety concerns specific to these regions.",  # Updated
        'b': "**b) Why Should People Care (Top States):** Identifying high-risk states for airplane accidents is crucial for public safety. By understanding where accidents are more frequent, we can advocate for improved safety measures, potentially saving lives and reducing injuries.",
        'c': "**c) Approach and Design (Top States):** We used a bar chart to visually compare accident frequencies across states. This design allows for easy identification of the states with the highest accident rates, making the data more accessible and understandable.",
        'd': "**d) Details of Your Process (Top States):** We analyzed data from the National Transportation Safety Board (NTSB) to identify the top 10 states with the most accidents. The data was cleaned and processed to ensure accuracy and consistency before creating the bar chart.",
        'e': "**e) What is Original About Your Research (Top States):** While previous studies have examined airplane accident trends, our research focuses specifically on identifying the top 10 states with the highest frequency of accidents. This provides a targeted perspective for safety improvements.",
        'f': "**f) Lessons Learned, Implications, Conclusions (Top States):** The findings highlight the need for targeted safety interventions in the identified high-risk states. This could involve infrastructure improvements, enhanced pilot training, or stricter safety regulations. Further research can investigate specific factors contributing to accidents in these states."
    }
    if section:
        return explanations.get(section, "")
    else:
        return "".join(explanations.values())

def explanation_text_injury_counts(section=None):
    explanations = {
        'start': "**Overall Summary:** This graph provides a breakdown of the total number of injuries resulting from airplane accidents, categorized by severity: fatal, serious, and minor. By understanding the distribution of injury types, we can better assess the impact of accidents and develop strategies to mitigate the risk of severe injuries or fatalities.",
        'a': "**a) Statement of the Problem (Injury Counts):** The problem is the lack of understanding regarding the distribution and severity of injuries resulting from airplane accidents, hindering effective safety interventions.",
        'b': "**b) Why Should People Care (Injury Counts):** Analyzing injury data is crucial for public health and safety. By understanding the types and frequency of injuries, we can advocate for improvements in aircraft design, emergency response, and passenger safety practices.",
        'c': "**c) Approach and Design (Injury Counts):** We used a bar chart to compare the counts of fatal, serious, and minor injuries. This design provides a clear visual representation of the injury distribution, allowing for easy interpretation and comparison.",
        'd': "**d) Details of Your Process (Injury Counts):** We extracted injury data from the NTSB database and categorized injuries into fatal, serious, and minor categories. The data was then aggregated to calculate total injury counts for each category.",
        'e': "**e) What is Original About Your Research (Injury Counts):** Our research provides a specific breakdown of injury types within the context of airplane accidents. This detailed analysis can contribute to a better understanding of the risks and consequences associated with such accidents.",
        'f': "**f) Lessons Learned, Implications, Conclusions (Injury Counts):** The findings emphasize the importance of focusing on safety measures that minimize the severity of injuries. This could involve improving aircraft safety features, enhancing medical response protocols, or promoting passenger safety education."
    }
    if section:
        return explanations.get(section, "")  # Return specific section if provided
    else:
        return "".join(explanations.values())  # Return all sections if no section specified

# Explanation text for the pie chart
def explanation_text_weather(section=None):
    explanations = {
        'start': "**Overall Summary:** This graph visually compares the weather conditions reported in airplane accidents involving Cessna aircraft, using data from both the NTSB and ASRS. By examining the distribution of weather conditions in each dataset, we can gain insights into potential weather-related factors that may contribute to Cessna accidents.",
        'a': "**a) Statement of the Problem (Weather Conditions):** The problem is to investigate whether specific weather conditions are more frequently associated with Cessna aircraft accidents, as reported in the NTSB and ASRS datasets.",
        'b': "**b) Why Should People Care (Weather Conditions):** Understanding the relationship between weather and Cessna accidents is crucial for improving aviation safety. By identifying weather-related risks, pilots and aviation authorities can take proactive measures to mitigate potential hazards.",
        'c': "**c) Approach and Design (Weather Conditions):** We used two pie charts to visually represent the distribution of weather conditions reported in Cessna accidents from the NTSB and ASRS datasets. Pie charts allow for easy comparison of the proportions of different weather conditions in each dataset.",
        'd': "**d) Details of Your Process (Weather Conditions):** We extracted data on weather conditions from both the NTSB and ASRS datasets, specifically focusing on accidents involving Cessna aircraft. The data was then aggregated to calculate the frequency of each weather condition, which was used to create the pie charts.",
        'e': "**e) What is Original About Your Research (Weather Conditions):** Our research provides a direct comparison of weather conditions reported in Cessna accidents from two distinct aviation safety databases, the NTSB and ASRS. This comparison offers a more comprehensive understanding of weather-related risks.",
        'f': "**f) Lessons Learned, Implications, Conclusions (Weather Conditions):** The findings can highlight potential weather-related factors that contribute to Cessna accidents. This information can inform pilot training, flight planning, and safety regulations to minimize weather-related risks.",
    }
    if section:
        return explanations.get(section, "")
    else:
        return "".join(explanations.values())

# Explanation text for the trends bar chart
def explanation_text_trends(section=None):
    explanations = {
        'start': "**Overall Summary:** This graph illustrates the combined trend of airplane accidents over time, using data from both the NTSB and ASRS. By analyzing this trend, we can gain insights into the overall safety performance of the aviation industry and identify periods of improvement or potential concerns.",
        'a': "**a) Statement of the Problem (Accident Trends):** The problem is to understand the overall pattern of airplane accidents over time, considering data from both the NTSB and ASRS to obtain a comprehensive view.",
        'b': "**b) Why Should People Care (Accident Trends):** Analyzing accident trends is crucial for assessing the effectiveness of aviation safety regulations and initiatives. It helps us identify areas where further improvements are needed to enhance safety and prevent future accidents.",
        'c': "**c) Approach and Design (Accident Trends):** We used a line chart to visualize the combined accident trends over time. This design allows for easy observation of changes in accident frequency over the years.",
        'd': "**d) Details of Your Process (Accident Trends):** We merged data from the NTSB and ASRS databases, focusing on the 'eventdate' column to extract yearly accident counts. The data was then aggregated and plotted as a line chart.",
        'e': "**e) What is Original About Your Research (Accident Trends):** Our research provides a combined analysis of accident trends from two distinct aviation safety databases, the NTSB and ASRS. This comprehensive approach offers a broader perspective on aviation safety performance.",
        'f': "**f) Lessons Learned, Implications, Conclusions (Accident Trends):** The findings can reveal whether accident rates are generally increasing, decreasing, or remaining stable over time. This information can inform policy decisions, safety programs, and resource allocation to further enhance aviation safety.",
    }
    if section:
        return explanations.get(section, "")
    else:
        return "".join(explanations.values())

# Create layout for the dashboard with tabs
app.layout = html.Div([
    html.H1("Analyzing Contributing Factors in Airplane Crashes"),
    html.H2("Data Dashboard"),

    dcc.Tabs([
    dcc.Tab(label='About', children=[
        html.Div([
            html.Div([
                html.P([html.B("Author: "), " Mary Moor"]),
                html.P([html.B("Class: "), "Rowan University - Data Quality & Web/Text Mining"]),
                html.P([html.B("Professor: "), "Anthony Breitzman"]),
                html.P([html.B("Date: "), "December 11, 2024"]),
            ]),
            html.Hr(),

            html.B("GitHub Repository: "),
            html.A("Airplane Crash Analysis Project", href="https://github.com/MaryCMoor/Airplane-Crash-Analysis-Project/tree/main", target="_blank"),

            html.Br(),
            html.Br(),

            html.B("View Full Report (PDF):"),
              html.A(
                  "Click Here",
                  href='/content/Analyzing Contributing Factors in Airplane Crashes.pdf',  # Path to your uploaded PDF
                  download='Analyzing Contributing Factors in Airplane Crashes.pdf',  # Filename for download
                  target='_blank'  # Open in a new tab
              ),
            html.Hr(),

            html.H2("Project Summary"),
            html.Div([
                html.P([
                    """
                    This project aims to identify """, html.B("common factors contributing to airplane crashes"), """ using data mining techniques. By analyzing crash data, we aim to uncover trends or patterns related to accident causes, potentially leading to improved safety measures and reduced risks.
                    """
                ]),
                html.Br(),

                html.B("Project Goals:"),
                html.Ul([
                    html.Li("Identify common factors contributing to airplane crashes."),
                    html.Li("Uncover trends or patterns related to accident causes."),
                    html.Li("Contribute to improved aviation safety."),
                ]),
                html.Br(),

                html.B("Data Sources:"),
                html.Ul([
                    html.Li("National Transportation Safety Board (NTSB) data"),
                    html.Li("Aviation Safety Reporting System (ASRS) data"),
                ]),
                html.Br(),

                html.B("Methodology:"),
                html.Ul([
                    html.Li("Data cleaning and preprocessing"),
                    html.Li("Exploratory data analysis using visualizations"),
                    html.Li("Statistical analysis to identify significant factors"),
                ]),
                html.Br(),

                html.B("Originality:"),
                html.P("""
                    This project focuses specifically on identifying the top 10 states with the highest frequency of airplane accidents and provides a detailed breakdown of injury types, offering a targeted perspective for safety improvements.
                """),
                html.Br(),

                html.B("Implications:"),
                html.P("""
                    The findings of this project can guide policymakers and safety organizations in implementing preventative measures, enhancing pilot training, and improving aircraft design to mitigate the risks associated with airplane accidents.
                """),
                html.Br(),

                html.B("Lessons Learned:"),
                html.P("""
                    This project has provided valuable insights into the complexities of aviation safety and the importance of data-driven analysis in identifying areas for improvement.
                """),
                html.Br(),

                html.B("References:"),
                html.P("https://www.ntsb.gov/Pages/AviationQueryV2.aspx"),
                html.P("Canvas Professor Breitzman Weekly Lecture Notes"),
                html.P("https://www.geeksforgeeks.org/plot-a-pie-chart-in-python-using-matplotlib/"),
                html.P("https://jupyter-dashboards-layout.readthedocs.io/en/latest/using.html"),
                html.P("https://www.geeksforgeeks.org/data-visualization-in-jupyter-notebook/"),
                html.P("https://www.geeksforgeeks.org/creating-interactive-dashboard-from-jupyter-notebooks/"),
                html.P("https://asrs.arc.nasa.gov/search/database.html")
            ]),
        ])
    ]),

        dcc.Tab(label='NTSB & ASRS Data Analysis', children=[
            html.Div([
                # First Graph: Top 10 States by Frequency of Accidents
                html.H3("Top 10 States by Frequency of Accidents (NTSB Data)"),
                dcc.Graph(id='top-states-chart', figure=create_ntsb_top_states_chart(ntsb_data)),

                # Dropdown for the first graph's explanation (updated options)
                dcc.Dropdown(
                    id='top-states-explanation-dropdown',
                    options=[
                      {'label': 'Click this drop down to view the top 10 states explanations', 'value': 'start'},
                      {'label': 'a) Statement of the Problem', 'value': 'a'},
                      {'label': 'b) Why Should People Care', 'value': 'b'},
                      {'label': 'c) Approach and Design', 'value': 'c'},
                      {'label': 'd) Details of Your Process', 'value': 'd'},
                      {'label': 'e) What is Original About Your Research', 'value': 'e'},
                      {'label': 'f) Lessons Learned, Implications, Conclusions', 'value': 'f'}
                    ],
                    value='start',  # Default to the first option
                    placeholder="Select an explanation section",
                    clearable=False
                ),

                # Div to hold the first graph's explanation text
                html.Div(id='top-states-explanation-text'),  # Initially empty

                html.Hr(),

                # Second Graph: Total Injury Counts in NTSB Data
                html.H3("Total Injury Counts in NTSB Data"),
                dcc.Graph(id='injury-counts-chart', figure=create_ntsb_injury_counts_chart(ntsb_data)),

                # Dropdown for the second graph's explanation (updated options)
                dcc.Dropdown(
                    id='injury-counts-explanation-dropdown',
                    options=[
                        {'label': 'Click this drop down to view injury counts explinations', 'value' : 'start'},
                        {'label': 'a) Statement of the Problem', 'value': 'a'},
                        {'label': 'b) Why Should People Care', 'value': 'b'},
                        {'label': 'c) Approach and Design', 'value': 'c'},
                        {'label': 'd) Details of Your Process', 'value': 'd'},
                        {'label': 'e) Additional Insights', 'value': 'e'},
                        {'label': 'f) Implications of Data', 'value': 'f'},
                    ],
                    value='start',  # Default to the first option
                    placeholder="Select an explanation section",
                    clearable=False
                ),

                # Div to hold the second graph's explanation text
                html.Div(id='injury-counts-explanation-text'),  # Initially empty

                html.Hr(),
                html.H3("Weather Conditions Comparison (Cessna Aircraft)"),
                dcc.Graph(id='weather-comparison-chart', figure=create_weather_comparison_chart(ntsb_data, asrs_data)),

                # Dropdown for the pie chart's explanation
                dcc.Dropdown(
                    id='weather-explanation-dropdown',
                    options=[
                        {'label': 'Click this dropdown to view the Weather Conditions explanations', 'value': 'start'},
                        {'label': 'a) Statement of the Problem', 'value': 'a'},
                        {'label': 'b) Why Should People Care', 'value': 'b'},
                        {'label': 'c) Approach and Design', 'value': 'c'},
                        {'label': 'd) Details of Your Process', 'value': 'd'},
                        {'label': 'e) What is Original About Your Research', 'value': 'e'},
                        {'label': 'f) Lessons Learned, Implications, Conclusions', 'value': 'f'},
                    ],
                    value='start',  # Default to overall summary
                    placeholder="Select an explanation section",
                    clearable=False
                ),
                # Div to hold the pie chart's explanation text
                html.Div(id='weather-explanation-text'),

                html.Hr(),
                html.H3("Accident Trends (1948-2024)"),
                dcc.Graph(id='accident_trends', figure=create_accident_trends_chart(merged_data)),

                # Dropdown for the trend bar chart explanation
                dcc.Dropdown(
                    id='trends-explanation-dropdown',
                    options=[
                        {'label': 'Click this dropdown to view the Accident Trends explanations', 'value': 'start'},
                        {'label': 'a) Statement of the Problem', 'value': 'a'},
                        {'label': 'b) Why Should People Care', 'value': 'b'},
                        {'label': 'c) Approach and Design', 'value': 'c'},
                        {'label': 'd) Details of Your Process', 'value': 'd'},
                        {'label': 'e) What is Original About Your Research', 'value': 'e'},
                        {'label': 'f) Lessons Learned, Implications, Conclusions', 'value': 'f'},
                    ],
                    value='start',  # Default to overall summary
                    placeholder="Select an explanation section",
                    clearable=False
                ),
                # Div to hold the pie chart's explanation text
                html.Div(id='trends-explanation-text')
                ])
        ]),
        # ... (Your other tabs remain the same) ...
    ])
])

# Callbacks to update explanation text
@app.callback(
    Output('top-states-explanation-text', 'children'),
    [Input('top-states-explanation-dropdown', 'value')]
)
def update_top_states_explanation(value):
    return html.Div(dcc.Markdown(explanation_text_top_states(value)))  # Use top_states function

@app.callback(
    Output('injury-counts-explanation-text', 'children'),
    [Input('injury-counts-explanation-dropdown', 'value')]
)
def update_injury_counts_explanation(value):
    return html.Div(dcc.Markdown(explanation_text_injury_counts(value)))  # Use injury_counts function

@app.callback(
    Output('weather-explanation-text', 'children'),
    [Input('weather-explanation-dropdown', 'value')]
)
def update_weather_explanation(value):
    return html.Div(dcc.Markdown(explanation_text_weather(value)))

@app.callback(
    Output('trends-explanation-text', 'children'),
    [Input('trends-explanation-dropdown', 'value')]
)
def update_trends_explanation(value):
    return html.Div(dcc.Markdown(explanation_text_trends(value)))

# Run the app
if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>