In [None]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, Markdown, HTML


In [None]:
# Load pre-cleaned dataset (you should update this path if needed)
pivot_df = pd.read_csv("cleaned_gender_pivot.csv")

# Convert year column to int if it's not already
pivot_df['Graduation Year'] = pivot_df['Graduation Year'].astype(int)


In [None]:
# Dropdowns and filters
field_options = ['All Fields'] + sorted(pivot_df['Field of Study'].unique())
region_options = ['All Regions'] + sorted(pivot_df['HSE Health Regions'].unique())
year_min, year_max = pivot_df['Graduation Year'].min(), pivot_df['Graduation Year'].max()

field_selector = widgets.ToggleButtons(
    options=field_options,
    value='All Fields',
    description='Field:',
    button_style=''
)

region_selector = widgets.ToggleButtons(
    options=region_options,
    value='All Regions',
    description='Region:',
    button_style=''
)

year_slider = widgets.IntRangeSlider(
    value=[year_min, year_max],
    min=year_min,
    max=year_max,
    step=1,
    description='Years:',
    continuous_update=False
)


In [None]:
def grouped_bar_chart(field, region, year_range):
    df = pivot_df.copy()
    if field != 'All Fields':
        df = df[df['Field of Study'] == field]
    if region != 'All Regions':
        df = df[df['HSE Health Regions'] == region]
    df = df[(df['Graduation Year'] >= year_range[0]) & (df['Graduation Year'] <= year_range[1])]

    grouped = df.groupby('HSE Health Regions')[['Female', 'Male']].sum().reset_index()

    fig = px.bar(
        grouped,
        x='HSE Health Regions',
        y=['Female', 'Male'],
        barmode='group',
        title=f"Graduate Counts by Gender and Region ({field}, {region})",
        height=500
    )
    fig.update_layout(xaxis_title='Region', yaxis_title='Graduates', legend_title='Gender')
    fig.show()

bar_out = widgets.interactive_output(grouped_bar_chart, {
    'field': field_selector,
    'region': region_selector,
    'year_range': year_slider
})

display(Markdown("## Chart 1: Gender Breakdown by Region"))
display(field_selector, region_selector, year_slider, bar_out)


In [None]:
def donut_chart(field, region, year_range):
    df = pivot_df.copy()
    if field != 'All Fields':
        df = df[df['Field of Study'] == field]
    if region != 'All Regions':
        df = df[df['HSE Health Regions'] == region]
    df = df[(df['Graduation Year'] >= year_range[0]) & (df['Graduation Year'] <= year_range[1])]

    total = df[['Female', 'Male']].sum()
    fig = px.pie(
        names=['Female', 'Male'],
        values=total.values,
        hole=0.4,
        title='Overall Gender Share',
    )
    fig.update_traces(textinfo='label+percent+value')
    fig.show()

donut_out = widgets.interactive_output(donut_chart, {
    'field': field_selector,
    'region': region_selector,
    'year_range': year_slider
})

display(Markdown("## Chart 2: Gender Share (Donut)"))
display(donut_out)


In [None]:
def box_plot(field, region, year_range):
    df = pivot_df.copy()
    if field != 'All Fields':
        df = df[df['Field of Study'] == field]
    if region != 'All Regions':
        df = df[df['HSE Health Regions'] == region]
    df = df[(df['Graduation Year'] >= year_range[0]) & (df['Graduation Year'] <= year_range[1])]

    df_long = df.melt(id_vars=['Graduation Year', 'HSE Health Regions'], value_vars=['Female', 'Male'], 
                      var_name='Gender', value_name='Graduates')

    fig = px.box(
        df_long,
        x='Gender',
        y='Graduates',
        points='all',
        color='Gender',
        hover_data=['Graduation Year', 'HSE Health Regions'],
        title='Outlier Detection by Gender'
    )
    fig.show()

box_out = widgets.interactive_output(box_plot, {
    'field': field_selector,
    'region': region_selector,
    'year_range': year_slider
})

display(Markdown("## Chart 3: Gender Outlier Detection"))
display(box_out)
