In [11]:
import pandas as pd
import plotly.express as px
import numpy as np 

In [16]:
import streamlit as st

In [7]:
import plotly.io as pio

In [13]:
# Load the CSV file
file_path = "random_data_with_normalized_absences.csv"
df = pd.read_csv(file_path)

In [24]:
# Sidebar filters
st.sidebar.header("Filters")
company = st.sidebar.selectbox("Select Company", df['Company'].unique())
year = st.sidebar.selectbox("Select Year", df['Year'].unique())
age_range = st.sidebar.selectbox("Select Age Range", df['Age Range'].unique())
contract_type = st.sidebar.selectbox("Select Contract Type", df['Contract Type'].unique())




In [25]:
# Filter the data based on selections
filtered_df = df[(df['Company'] == company) &
                 (df['Year'] == year) &
                 (df['Age Range'] == age_range) &
                 (df['Contract Type'] == contract_type)]


In [26]:
# First Graph: Percentage of Men and Women
df_melted_1 = filtered_df.melt(id_vars='Category', 
                               value_vars=['Men', 'Women'], 
                               var_name='Gender', 
                               value_name='Percentage')

In [27]:
fig1 = px.bar(
    df_melted_1,
    x='Category',
    y='Percentage',
    color='Gender',
    text='Percentage',
    barmode='group',
    color_discrete_map={'Men': 'gray', 'Women': 'pink'},
    labels={'Category': 'Job Category', 'Percentage': 'Percentage (%)'},
    title='Percentage of Men and Women in Each Job Category'
)


In [28]:

fig1.update_traces(texttemplate='%{text:.0f}%', textposition='outside')
fig1.update_layout(
    yaxis=dict(title='Percentage (%)', range=[0, 100]),
    xaxis=dict(title='Job Category'),
    legend_title_text='Gender'
)


In [22]:
# Second Graph: Normalized Absences
df_melted_2 = filtered_df.melt(id_vars='Category', 
                               value_vars=['Normalized Men Absence', 'Normalized Women Absence'], 
                               var_name='Gender', 
                               value_name='Normalized Absences (Hours)')

fig2 = px.bar(
    df_melted_2,
    x='Category',
    y='Normalized Absences (Hours)',
    color='Gender',
    text='Normalized Absences (Hours)',
    barmode='group',
    color_discrete_map={'Normalized Men Absence': 'gray', 'Normalized Women Absence': 'pink'},
    labels={'Category': 'Job Category', 'Normalized Absences (Hours)': 'Absences per Person (Normalized)'},
    title='Normalized Absences per Person by Job Category and Gender'
)

fig2.update_traces(texttemplate='%{text:.1f}', textposition='outside')
fig2.update_layout(
    yaxis=dict(title='Absences per Person (Normalized)'),
    xaxis=dict(title='Job Category'),
    legend_title_text='Gender'
)


In [23]:

# Streamlit layout
st.title("Company Gender Equality Dashboard")

st.subheader("1. Percentage of Men and Women")
st.plotly_chart(fig1, use_container_width=True)

st.subheader("2. Normalized Absences")
st.plotly_chart(fig2, use_container_width=True)



DeltaGenerator()

In [None]:
pio.renderers.default = 'browser'  # Force rendering in browser

fig.show()

doing scv

In [12]:

# Load the existing CSV file
file_path = "../../data/Random_Data_for_Dashboard.csv"  
df = pd.read_csv(file_path)

# Assume an average of 1800 work hours per year per person
average_hours_per_year = 1800

# Calculate total work hours per gender per category
df['Men Work Hours'] = (df['Men'] / 100) * average_hours_per_year
df['Women Work Hours'] = (df['Women'] / 100) * average_hours_per_year

# Calculate absences in hours for men and women
np.random.seed(42)  # For reproducibility
df['Men Absences (Hours)'] = df['Men Work Hours'] * (np.random.uniform(5, 20, size=len(df)) / 100)
df['Women Absences (Hours)'] = df['Women Work Hours'] * (np.random.uniform(5, 20, size=len(df)) / 100)

# Calculate absences per person
df['Men Absence per Person'] = df['Men Absences (Hours)'] / (df['Men'] / 100)
df['Women Absence per Person'] = df['Women Absences (Hours)'] / (df['Women'] / 100)

# Normalize absences by scaling to population size
df['Normalized Men Absence'] = df['Men Absence per Person'] * (df['Men'] / df['Men'].sum())
df['Normalized Women Absence'] = df['Women Absence per Person'] * (df['Women'] / df['Women'].sum())

# Save the updated CSV with normalized absences
updated_file_path = "random_data_with_normalized_absences.csv"
df.to_csv(updated_file_path, index=False)

# Display the dataframe
print(df.head())

  Company  Year Age Range Contract Type   Category  Men  Women  \
0     EDF  2020     26-35           CDD     Cadres   78     22   
1     EDF  2020     26-35           CDD   Maîtrise   68     32   
2     EDF  2020     26-35           CDD  Exécution   54     46   
3     EDF  2020     26-35           CDI     Cadres   82     18   
4     EDF  2020     26-35           CDI   Maîtrise   47     53   

   Men Work Hours  Women Work Hours  Men Absences (Hours)  \
0          1404.0             396.0            149.078149   
1          1224.0             576.0            235.751147   
2           972.0             828.0            155.324717   
3          1476.0             324.0            206.342988   
4           846.0             954.0             62.098765   

   Women Absences (Hours)  Men Absence per Person  Women Absence per Person  \
0               76.518998              191.125832                347.813626   
1               92.554294              346.692863                289.232168   