In [None]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches
import plotly.io as pio
import matplotlib.pyplot as plt

In [None]:
path = "report/merged"
accidents = pd.read_csv("cleaned_data/merged.csv")
accidents['date'] = pd.to_datetime(accidents['date'], errors='coerce')
accidents

In [None]:
def weighted_count(accident_sources_filtered):
    accident_sources_filtered['description'] = accident_sources_filtered['sector'] + " month: " + accident_sources_filtered['month'].astype(str)
    accident_sources_filtered['Weighted Count (count/worker count)'] = accident_sources_filtered['count'] / accident_sources_filtered['avg_worker_count']
    accident_sources_filtered = accident_sources_filtered[['description', 'Weighted Count (count/worker count)']]
    accident_sources_filtered['Weighted Count (count/worker count)'] = accident_sources_filtered['Weighted Count (count/worker count)'].astype(float)
    return accident_sources_filtered

In [None]:
accidents['Year-Month'] = accidents['date'].dt.to_period('M').astype(str)
accident_sources_filtered = accidents[['Year-Month', 'sector', 'avg_worker_count']].value_counts().reset_index()
# accident_sources_filtered23 = accident_sources_filtered[accident_sources_filtered['year']==2023]
# accident_sources_filtered22 = accident_sources_filtered[accident_sources_filtered['year']==2022]
# accident_sources_filtered23 = weighted_count(accident_sources_filtered23)
# accident_sources_filtered22 = weighted_count(accident_sources_filtered22)


accident_sources_filtered = accidents.groupby(['Year-Month', 'sector', 'avg_worker_count']).size().unstack(fill_value=0)
# top_occupations_industry = accident_sources_filtered.loc[top_occupations.index]
accident_sources_filtered

In [None]:
# Occupation vs Industry (Clustered Bar Chart)

plt.figure(figsize=(14, 8))
accident_sources_filtered.plot(kind='bar', colormap='plasma', figsize=(14, 8))
plt.title("Industry Distribution for Top 10 Occupations", fontsize=14)
plt.xlabel("Occupation", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.tight_layout()
# plt.savefig(path+"occupation_industry_bar.png")
# plt.close()
plt.show()

In [None]:
# # accident_sources_filtered = accident_sources_filtered.dropna(subset='Weighted Count (count/worker count)')

# plt.figure(figsize=(10, 6))
# plt.hist(x=accident_sources_filtered23['description'].to_list(), data=accident_sources_filtered23, bins=40, edgecolor='black', alpha=0.7)
# plt.hist(x=accident_sources_filtered22['description'].to_list(), data=accident_sources_filtered22, bins=40, edgecolor='black', alpha=0.7)
# plt.title('Year 2023', fontsize=14)
# plt.xlabel('Sector Description', fontsize=12)
# plt.ylabel('Weighted Frequency', fontsize=12)
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# plt.xticks(rotation=45, ha='right', fontsize=10)
# # plt.savefig("report/dataset2/worker_age_distribution_cleaned.png")
# plt.show()
# # plt.close()

# # fig2 = px.bar(accident_sources_filtered, x='Sector', y='Weighted Count (count/worker count)', title='Sector Distribution', text_auto=True)

In [None]:
path = "report/merged"
accidents = pd.read_csv("cleaned_data/merged.csv")
accidents['date'] = pd.to_datetime(accidents['date'], errors='coerce')
accidents['Year-Month'] = accidents['date'].dt.to_period('M').astype(str)

unique_industries = accidents['sector'].dropna().unique()
unique_accident_categories = accidents['accident_category_description'].dropna().unique()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Workplace Accident Analysis Dashboard", style={'textAlign': 'center'}),
    html.Label("Select Industry:"),
    dcc.Dropdown(
        id='industry-dropdown',
        options=[{'label': i, 'value': i} for i in unique_industries],
        multi=True
    ),
    html.Label("Select Accident Category:"),
    dcc.Dropdown(
        id='accident-dropdown',
        options=[{'label': i, 'value': i} for i in unique_accident_categories],
        multi=True
    ),
    dcc.Graph(id='accident-trends-graph'),
    dcc.Graph(id='accident-source-bar'),
    html.Button("Export to PowerPoint", id='export-button', n_clicks=0),
    dcc.Download(id="download-ppt")
], style={'width': '48%', 'display': 'inline-block'})

@app.callback(
    [Output('accident-trends-graph', 'figure'),
     Output('accident-source-bar', 'figure')],
    [Input('industry-dropdown', 'value'),
     Input('accident-dropdown', 'value')]
)
def update_graphs(selected_industries, selected_accidents):
    filtered_data = accidents.copy()
    if selected_industries:
        filtered_data = filtered_data[filtered_data['sector'].isin(selected_industries)]
    if selected_accidents:
        filtered_data = filtered_data[filtered_data['accident_category_description'].isin(selected_accidents)]

    accident_trends_filtered = filtered_data.groupby('Year-Month').size().reset_index(name='Accident Count')
    fig1 = px.line(accident_trends_filtered, x='Year-Month', y='Accident Count', title='Monthly Workplace Accident Trends', markers=True)

    accident_sources_filtered = filtered_data['accident_source_category_description1'].value_counts().reset_index()
    accident_sources_filtered.columns = ['Accident Source', 'Count']
    fig2 = px.bar(accident_sources_filtered, x='Accident Source', y='Count', title='Accident Sources Distribution', text_auto=True)

    return fig1, fig2

@app.callback(
    # Output("download-ppt", "data"),
    [Input("export-button", "n_clicks"),
     Input('accident-trends-graph', 'figure'),
     Input('accident-source-bar', 'figure')]
)
def export_to_ppt(n_clicks, fig1, fig2):
    if n_clicks > 0:
        ppt = Presentation()
        slide_layout = ppt.slide_layouts[5]

        # slide with trends graph
        slide = ppt.slides.add_slide(slide_layout)
        slide.shapes.title.text = "Monthly Workplace Accident Trends"
        image_stream = BytesIO()
        pio.write_image(fig1, image_stream, format='png')
        image_stream.seek(0)
        slide.shapes.add_picture(image_stream, Inches(1), Inches(1), Inches(7), Inches(5))

        # slide with sources bar graph
        slide = ppt.slides.add_slide(slide_layout)
        slide.shapes.title.text = "Accident Sources Distribution"
        image_stream = BytesIO()
        pio.write_image(fig2, image_stream, format='png')
        image_stream.seek(0)
        slide.shapes.add_picture(image_stream, Inches(1), Inches(1), Inches(7), Inches(5))

        ppt_file = path + "interactive_plt.pptx"#BytesIO()
        ppt.save(ppt_file)
        print("Your powerpoint slides saved in", ppt_file)
        # ppt_file.seek(0)
        # temp = send_file(ppt_file, download_name="Accident_Report.pptx", as_attachment=True)
        # print('***')
        # print(ppt_file)

        # return send_file(ppt_file, download_name="Accident_Report.pptx", as_attachment=True)

if __name__ == '__main__':
    app.run_server(debug=True)
