In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import dash
from dash import dcc, html, Input, Output
import plotly.express as px

df2 = pd.read_csv('cleaned_data/accidents.csv')

df2['date'] = pd.to_datetime(df2['date'], format="%Y-%m-%d")

df2['Year'] = df2['date'].dt.year
df2['Month'] = df2['date'].dt.month

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Injury Predictions Dashboard"),
    html.Div([
        dcc.Dropdown(
            id='city-filter',
            options=[{'label': city, 'value': city} for city in df2['city'].dropna().unique()],
            multi=True,
            placeholder="Select cities"
        ),
        dcc.Dropdown(
            id='source-filter',
            options=[{'label': source, 'value': source} for source in df2['accident_source_category_description1'].dropna().unique()],
            multi=True,
            placeholder="Select accident sources"
        ),
        dcc.Dropdown(
            id='sector-filter',
            options=[{'label': sector, 'value': sector} for sector in df2['industry_sector_description'].dropna().unique()],
            multi=True,
            placeholder="Select industry sectors"
        ),
        dcc.Dropdown(
            id='place-filter',
            options=[{'label': place, 'value': place} for place in df2['accident_place_description'].dropna().unique()],
            multi=True,
            placeholder="Select accident places"
        ),
        dcc.Dropdown(
            id='province-filter',
            options=[{'label': province, 'value': province} for province in df2['organization_province_code'].dropna().unique()],
            multi=True,
            placeholder="Select provinces"
        )
    ], style={"margin-bottom": "20px"}),

    dcc.Graph(id='injury-predictions-graph'),
    dcc.Graph(id='monthly-injury-predictions-graph'),
    dcc.Graph(id='seasonal-variations-graph')
])

@app.callback(
    [Output('injury-predictions-graph', 'figure'),
     Output('monthly-injury-predictions-graph', 'figure'),
     Output('seasonal-variations-graph', 'figure')],
    [Input('city-filter', 'value'),
     Input('source-filter', 'value'),
     Input('sector-filter', 'value'),
     Input('place-filter', 'value'),
     Input('province-filter', 'value')]
)
def update_graphs(selected_cities, selected_sources, selected_sectors, selected_places, selected_provinces):
    filtered_df = df2.copy()

    # Apply filters if values are selected
    if selected_cities:
        filtered_df = filtered_df[filtered_df['city'].isin(selected_cities)]
    if selected_sources:
        filtered_df = filtered_df[filtered_df['accident_source_category_description1'].isin(selected_sources)]
    if selected_sectors:
        filtered_df = filtered_df[filtered_df['industry_sector_description'].isin(selected_sectors)]
    if selected_places:
        filtered_df = filtered_df[filtered_df['accident_place_description'].isin(selected_places)]
    if selected_provinces:
        filtered_df = filtered_df[filtered_df['organization_province_code'].isin(selected_provinces)]

    # Tear and month for analysis
    injuries_by_year = filtered_df.groupby('Year').size()
    injuries_by_month = filtered_df.groupby('Month').size()
    monthly_data = filtered_df.groupby(['Year', 'Month']).size().reset_index(name='Count')
    monthly_data['Date'] = pd.to_datetime(monthly_data[['Year', 'Month']].assign(Day=1))

    # Yearly prediction
    if not injuries_by_year.empty:
        X_years = np.array(injuries_by_year.index).reshape(-1, 1)
        y_injuries = np.array(injuries_by_year.values)
        model = LinearRegression()
        model.fit(X_years, y_injuries)
        future_years = np.array(range(int(X_years.max()) + 1, int(X_years.max()) + 6)).reshape(-1, 1)
        predicted_injuries = model.predict(future_years)
        print(future_years.flatten())

        prediction_fig = {
            'data': [
                {'x': injuries_by_year.index, 'y': injuries_by_year.values, 'type': 'scatter', 'name': 'Observed Injuries'},
                {'x': future_years.flatten(), 'y': predicted_injuries, 'type': 'scatter', 'name': 'Predicted Injuries'}
            ],
            'layout': {'title': 'Injury Predictions'}
        }
    else:
        prediction_fig = {
            'data': [],
            'layout': {'title': 'No data available for predictions'}
        }
    

    # Monthly prediction

    if not monthly_data.empty:
        X_months = np.array((monthly_data['Date'] - monthly_data['Date'].min()).dt.days).reshape(-1, 1)
        y_months = monthly_data['Count'].values
        model = LinearRegression()
        model.fit(X_months, y_months)
        last_date = monthly_data['Date'].max()
        future_months = pd.date_range(last_date, periods=12, freq='MS')
        future_X = np.array((future_months - monthly_data['Date'].min()).days).reshape(-1, 1)
        predicted_monthly_injuries = model.predict(future_X)
        temp = max(monthly_data['Date'].index) + 1
        all_months = pd.concat([monthly_data[['Date']], pd.DataFrame(future_months, columns=['Date'])])
        # list(range(temp, temp + len(future_months)))

        monthly_prediction_fig = {
            'data': [
                {'x': monthly_data['Date'].to_list(), 'y': y_months, 'type': 'scatter', 'name': 'Observed Injuries'},
                {'x': future_months.to_list(), 'y': predicted_monthly_injuries, 'type': 'scatter', 'name': 'Monthly Predicted Injuries'}
            ],
            'layout': {'title': 'Injury Predictions'}
        }
    else:
        monthly_prediction_fig = {
            'data': [],
            'layout': {'title': 'No data available for predictions'}
        }



        # monthly_prediction_fig = px.line(x=future_months, y=predicted_monthly_injuries, title='Monthly Injury Predictions',
        #                                  labels={'x': 'Month', 'y': 'Predicted Injuries'})


    # Seasonal variations figure
    seasonal_fig = px.bar(x=injuries_by_month.index, y=injuries_by_month.values, title='Seasonal Injury Variations',
                          labels={'x': 'Month', 'y': 'Number of Injuries'},
                          color_discrete_sequence=['skyblue'])
    seasonal_fig.update_layout(xaxis=dict(tickmode='array',
                                          tickvals=list(range(1, 13)),
                                          ticktext=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]))

    return prediction_fig, monthly_prediction_fig, seasonal_fig


if __name__ == '__main__':
    app.run_server(debug=True)


[2025 2026 2027 2028 2029]
[2025 2026 2027 2028 2029]
[2025 2026 2027 2028 2029]
[2025 2026 2027 2028 2029]
