In [None]:
%pip install pandas matplotlib seaborn plotly dash scikit-learn joblib
import pandas as pd
from pptx import Presentation
from pptx.util import Inches
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import dash
from dash import dcc, html, Input, Output
from dash.dependencies import Input, Output
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import re
from io import BytesIO
from flask import send_file
import base64
import plotly.io as pio
from datetime import datetime
import plotly.express as px

In [None]:
accidents = pd.read_csv("cleaned_data/accidents.csv")
path = "report/dataset2/"

In [None]:
# sns.scatterplot(
#     data=accidents, x='worker_age', y='program',
#     alpha=0.6, color='teal'
# )
# plt.title("Worker Experience vs Program", fontsize=14)
# plt.xlabel("Worker Age (Years)", fontsize=12)
# plt.ylabel("Program Categories", fontsize=12)
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# # plt.tight_layout()
# plt.savefig(path + "worker_age_vs_program.png")
# plt.show()
# plt.close()

In [None]:
data = accidents.copy()
columns = data.columns.tolist()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Interactive Scatter Plot Viewer"),

    html.Div([
        html.Label("Select Columns for Scatter Plot (2D or 3D):"),
        dcc.Dropdown(id='scatter-column-selector', options=[{'label': col, 'value': col} for col in columns], multi=True),

        html.Label("Adjust Figure Size:"),
        dcc.Input(id='fig-width', type='number', value=8, step=1, min=4, max=15, placeholder='Width'),
        dcc.Input(id='fig-height', type='number', value=6, step=1, min=4, max=15, placeholder='Height'),
    ], style={'width': '48%', 'display': 'inline-block'}),

    html.Button("Save Plot", id='save-button', n_clicks=0),
    html.Img(id='scatter-image')
])

@app.callback(
    [Output('scatter-image', 'src')],
    [Input('scatter-column-selector', 'value'),
     Input('fig-width', 'value'),
     Input('fig-height', 'value'),
     Input('save-button', 'n_clicks')]
)
def update_and_save_scatter_plot(selected_cols, fig_width, fig_height, n_clicks):
    if selected_cols and len(selected_cols) in [2, 3]:
        fig = None
        if len(selected_cols) == 2:
            # Generate 2D scatter plot
            plt.figure(figsize=(fig_width, fig_height))
            sns.scatterplot(data=data, x=selected_cols[0], y=selected_cols[1], alpha=0.6, color='teal')
            plt.title(f"2D Scatter Plot: {selected_cols[0]} vs {selected_cols[1]}")
            
            buf = BytesIO()
            plt.savefig(buf, format="png")
            buf.seek(0)
            encoded_image = base64.b64encode(buf.read()).decode('utf-8')
            buf.close()
        else:
            # plt.figure(figsize=(fig_width, fig_height))
            # sns.scatterplot(data=data, x=selected_cols[0], y=selected_cols[1], hue=selected_cols[2], alpha=0.6, color='teal')
            # plt.title(f"2D Scatter Plot: {selected_cols[0]} vs {selected_cols[1]}")
            
            fig = px.scatter_3d(data, 
                                x="worker_age", 
                                y="worker_experience_in_years", 
                                z="occupation_category_code",
                                opacity=0.7,
                                size_max=.2,
                                title="3D Scatter Plot of Workplace Accidents")
            img_bytes = fig.to_image(format="png")
            encoded_image = base64.b64encode(img_bytes).decode('utf-8')
            #return fig
        
        # plt.tight_layout()


        if n_clicks > 0:
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            save_path = path + f"scatter_plot_{timestamp}.png"
            plt.savefig(save_path)

        if len(selected_cols) == 2:
            plt.close(fig)

        return [f"data:image/png;base64,{encoded_image}"]
    else:
        return ["data:image/png;base64,"]

if __name__ == '__main__':
    app.run_server(debug=True)


In [None]:
import pandas as pd

# Load the dataset
file_path = "cleaned_data/accidents.csv"  # Update the path if necessary
df2 = pd.read_csv(file_path)

# Convert columns to numeric, forcing errors to NaN
df2["worker_age"] = pd.to_numeric(df2["worker_age"], errors="coerce")
df2["worker_experience_in_years"] = pd.to_numeric(df2["worker_experience_in_years"], errors="coerce")
df2["occupation_category_code"] = pd.to_numeric(df2["occupation_category_code"], errors="coerce")

# Drop rows with missing values
df_cleaned = df2.dropna(subset=["worker_age", "worker_experience_in_years", "occupation_category_code"])

# Create 3D scatter plot using Plotly
fig = px.scatter_3d(df_cleaned, 
                     x="worker_age", 
                     y="worker_experience_in_years", 
                     z="occupation_category_code",
                     opacity=0.7,
                     size_max=.2,
                     title="3D Scatter Plot of Workplace Accidents")

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()