In [None]:
%pip install pandas matplotlib seaborn plotly dash scikit-learn joblib
import pandas as pd
from pptx import Presentation
from pptx.util import Inches
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import dash
from dash import dcc, html, Input, Output
from dash.dependencies import Input, Output
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import re
from io import BytesIO
from flask import send_file
import base64
import plotly.io as pio
from datetime import datetime
import plotly.express as px

In [None]:
accidents = pd.read_csv("cleaned_data/accidents.csv")
path = "report/dataset2/"

In [None]:
accidents

In [None]:
# sns.scatterplot(
#     data=accidents, x='worker_age', y='program',
#     alpha=0.6, color='teal'
# )
# plt.title("Worker Experience vs Program", fontsize=14)
# plt.xlabel("Worker Age (Years)", fontsize=12)
# plt.ylabel("Program Categories", fontsize=12)
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# # plt.tight_layout()
# plt.savefig(path + "worker_age_vs_program.png")
# plt.show()
# plt.close()

In [None]:
cleaned = accidents.copy()
# How
cleaned['source_category_description2'][cleaned['source_category_description2']=='UNKNOWN'] = ''
cleaned['source_category_description'] = cleaned['accident_source_category_description1'] + ' - ' + cleaned['source_category_description2']
cleaned['how'] = cleaned['source_category_description'] + ' - ' + cleaned['accident_category_description']
cleaned = cleaned.drop(columns=['accident_source_category_description1', 'source_category_description2', 'source_category_description', 'accident_category_description'])

# Where
cleaned['where'] = cleaned['organization_province_code']+' - '+cleaned['city']+' - '+cleaned['industry_sector_description']+' - '+cleaned['occupation_description']+' - '+cleaned['accident_place_description']
cleaned = cleaned.drop(columns=['organization_province_code', 'city', 'accident_place_description', 'occupation_description', 'occupation_category_code', 'occupation_category_description', 'industry_sector_description'])

In [None]:
data = accidents.copy()#cleaned.copy()
columns = data.columns.tolist()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Interactive Scatter Plot Viewer"),

    html.Div([
        html.Label("Select Columns for Scatter Plot (2D or 3D):"),
        dcc.Dropdown(id='scatter-column-selector', options=[{'label': col, 'value': col} for col in columns], multi=True),

        html.Label("Adjust Figure Size:"),
        dcc.Input(id='fig-width', type='number', value=8, step=1, min=4, max=15, placeholder='Width'),
        dcc.Input(id='fig-height', type='number', value=6, step=1, min=4, max=15, placeholder='Height'),
    ], style={'width': '48%', 'display': 'inline-block'}),

    html.Button("Save Plot", id='save-button', n_clicks=0),
    dcc.Graph(id='scatter-plot')
])

@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('scatter-column-selector', 'value'),
     Input('fig-width', 'value'),
     Input('fig-height', 'value')]
)
def update_plot(selected_cols, fig_width, fig_height):
    if selected_cols:
        if len(selected_cols) == 1:
            if pd.api.types.is_numeric_dtype(data[selected_cols[0]]):
                hist_data = data[selected_cols[0]].value_counts()
                fig = px.bar(x=hist_data.index, y=hist_data.values, title=f"{selected_cols[0]}", labels={'x': selected_cols[0], 'y': 'Frequency'}, color_discrete_sequence=['blue'])
            else:
                hist_data = data[selected_cols[0]].value_counts().head(20)
                fig = px.bar(x=hist_data.index, y=hist_data.values, title=f"{selected_cols[0]}", labels={'x': selected_cols[0], 'y': 'Frequency'}, color_discrete_sequence=['blue'])
                # fig = px.histogram(data, x=selected_cols[0], title=f"Histogram of {selected_cols[0]}")
        elif len(selected_cols) == 2:
            fig = px.scatter(data, x=selected_cols[0], y=selected_cols[1], opacity=0.6, title=f"{selected_cols[0]} vs {selected_cols[1]}")
        else:
            fig = px.scatter_3d(data, 
                                x=selected_cols[0],
                                y=selected_cols[1],
                                z=selected_cols[2],
                                opacity=0.7,
                                size_max=0.2,
                                title=f"{selected_cols[0]} vs {selected_cols[1]} vs {selected_cols[2]}")
        fig.update_layout(width=fig_width * 100, height=fig_height * 100)
        return fig
    else:
        return px.scatter(title="Select valid columns for plotting")

if __name__ == '__main__':
    app.run_server(debug=True)
