In [1]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, Markdown
from ipywidgets import interactive_output



# import cleaned pivot
pivot_df = pd.read_csv("cleaned_gender_pivot.csv")

In [2]:
summary_data = {
    'Rows': len(pivot_df),
    'Years': f"{pivot_df['Graduation Year'].min()}–{pivot_df['Graduation Year'].max()}",
    'Fields of Study': pivot_df['Field of Study'].nunique(),
    'Genders': ['Male', 'Female'],
    'Regions': pivot_df['HSE Health Regions'].nunique(),
    'Total Graduates': int(pivot_df['Female'].sum() + pivot_df['Male'].sum())
}

display(Markdown("### Dataset Summary"))
for k, v in summary_data.items():
    display(Markdown(f"**{k}**: {v}"))

### Dataset Summary

**Rows**: 216

**Years**: 2010–2021

**Fields of Study**: 3

**Genders**: ['Male', 'Female']

**Regions**: 6

**Total Graduates**: 22626

In [3]:
# Dropdown: Field of Study
field_options = ['All Fields'] + sorted(pivot_df['Field of Study'].unique().tolist())

field_dropdown = widgets.Dropdown(
    options=field_options,
    value='All Fields',
    description='Field:'
)

# Toggle: Year View Mode
year_mode = widgets.ToggleButtons(
    options=['All Years', 'Single Year', 'Year Range'],
    value='All Years',
    description='Year View:'
)

# Dropdown: Single Year
year_dropdown = widgets.Dropdown(
    options=sorted(pivot_df['Graduation Year'].unique()),
    value=pivot_df['Graduation Year'].min(),
    description='Year:'
)

# Range Slider: Year Range
year_slider = widgets.IntRangeSlider(
    value=[pivot_df['Graduation Year'].min(), pivot_df['Graduation Year'].max()],
    min=pivot_df['Graduation Year'].min(),
    max=pivot_df['Graduation Year'].max(),
    step=1,
    description='Years:',
    continuous_update=False
)

# Container to switch between dropdown/slider based on mode
year_widget_container = widgets.Output()

def get_year_widget(view):
    if view == 'Single Year':
        return year_dropdown
    elif view == 'Year Range':
        return year_slider
    else:
        return widgets.HTML("")

def update_year_controls(change):
    year_widget_container.clear_output()
    with year_widget_container:
        display(get_year_widget(change['new']))

year_mode.observe(update_year_controls, names='value')
update_year_controls({'new': year_mode.value})


filter_panel = widgets.VBox([
    field_dropdown,
    year_mode,
    year_widget_container
])

display(Markdown("### Filters"))
display(filter_panel)


import plotly.express as px

def update_grouped_bar_chart(field, view_mode, single_year, year_range):
    # Start with field filter
    if field != 'All Fields':
        filtered = pivot_df[pivot_df['Field of Study'] == field]
    else:
        filtered = pivot_df.copy()

    # Apply year filtering based on mode
    if view_mode == 'Single Year':
        filtered = filtered[filtered['Graduation Year'] == single_year]
    elif view_mode == 'Year Range':
        start, end = year_range
        filtered = filtered[(filtered['Graduation Year'] >= start) & (filtered['Graduation Year'] <= end)]

    # Group by region
    grouped = filtered.groupby('HSE Health Regions')[['Female', 'Male']].sum().reset_index()

    # Plot grouped bar chart
    fig = px.bar(
        grouped,
        x='HSE Health Regions',
        y=['Female', 'Male'],
        barmode='group',
        title=f"Graduate Counts by Gender and Region — {field} ({view_mode})",
        labels={'value': 'Graduate Count', 'HSE Health Regions': 'Region'},
        height=500
    )
    fig.update_layout(xaxis_title='Region', yaxis_title='Graduates', legend_title='Gender')
    fig.show()
    # Connect chart function to widget inputs
chart_output = interactive_output(update_grouped_bar_chart, {
    'field': field_dropdown,
    'view_mode': year_mode,
    'single_year': year_dropdown,
    'year_range': year_slider
})

# Display chart
display(Markdown("### Chart 1: Gender Breakdown by Region"))
display(chart_output)

### Filters

VBox(children=(Dropdown(description='Field:', options=('All Fields', 'Medicine', 'Nursing and Midwifery', 'Soc…

### Chart 1: Gender Breakdown by Region

Output()

In [4]:
import ipywidgets as widgets
from IPython.display import display, Markdown
import plotly.graph_objects as go
from ipywidgets import interactive_output

# 1. Region selector
region_options = ['All Regions'] + sorted(pivot_df['HSE Health Regions'].unique().tolist())
region_buttons = widgets.ToggleButtons(
    options=region_options,
    value='All Regions',
    description='Region:'
)

# 2. Field of Study dropdown
donut_field_dropdown = widgets.Dropdown(
    options=['All Fields'] + sorted(pivot_df['Field of Study'].unique().tolist()),
    value='All Fields',
    description='Field:'
)

# 3. Year View Mode toggle
donut_year_mode = widgets.ToggleButtons(
    options=['All Years', 'Single Year', 'Year Range'],
    value='All Years',
    description='Year View:'
)

# 4. Single Year dropdown
donut_year_dropdown = widgets.Dropdown(
    options=sorted(pivot_df['Graduation Year'].unique()),
    value=pivot_df['Graduation Year'].max(),
    description='Year:'
)

# 5. Year Range slider
donut_year_slider = widgets.IntRangeSlider(
    value=[pivot_df['Graduation Year'].min(), pivot_df['Graduation Year'].max()],
    min=pivot_df['Graduation Year'].min(),
    max=pivot_df['Graduation Year'].max(),
    step=1,
    description='Years:',
    continuous_update=False
)

# 6. Container for dynamic year widget
donut_year_container = widgets.Output()

def get_year_widget(view):
    if view == 'Single Year':
        return donut_year_dropdown
    elif view == 'Year Range':
        return donut_year_slider
    else:
        return widgets.HTML("")

def update_donut_year_controls(change):
    donut_year_container.clear_output()
    with donut_year_container:
        display(get_year_widget(change['new']))

donut_year_mode.observe(update_donut_year_controls, names='value')
update_donut_year_controls({'new': donut_year_mode.value})

# 7. Donut Chart Function
def update_donut_chart(field, region, year_mode, single_year, year_range):
    filtered = pivot_df.copy()

    if field != 'All Fields':
        filtered = filtered[filtered['Field of Study'] == field]

    if region != 'All Regions':
        filtered = filtered[filtered['HSE Health Regions'] == region]

    if year_mode == 'Single Year':
        filtered = filtered[filtered['Graduation Year'] == single_year]
    elif year_mode == 'Year Range':
        start, end = year_range
        filtered = filtered[(filtered['Graduation Year'] >= start) & (filtered['Graduation Year'] <= end)]

    female_total = filtered['Female'].sum()
    male_total = filtered['Male'].sum()

    labels = ['Female', 'Male']
    values = [female_total, male_total]

    fig = go.Figure(data=[go.Pie(
        labels=labels,
        values=values,
        hole=0.4,
        marker_colors=['#636EFA', '#EF553B'],
        textinfo='label+percent+value',
        texttemplate='%{label}: %{percent} (%{value:,})',
        hovertemplate='%{label}<br>Count: %{value:,}<br>Percent: %{percent}<extra></extra>',
        insidetextorientation='horizontal'
    )])

    title_year = f"{year_mode}" if year_mode == "All Years" else (
        f"{single_year}" if year_mode == "Single Year" else f"{year_range[0]}–{year_range[1]}"
    )

    title = f"Graduate Gender Share — {field}, {region} ({title_year})"

    fig.update_layout(
        title_text=title,
        annotations=[dict(text='Gender', x=0.5, y=0.5, font_size=18, showarrow=False)],
        height=450
    )

    fig.show()

# 8. Link to widgets
donut_chart_output = interactive_output(update_donut_chart, {
    'field': donut_field_dropdown,
    'region': region_buttons,
    'year_mode': donut_year_mode,
    'single_year': donut_year_dropdown,
    'year_range': donut_year_slider
})

# 9. Display
display(Markdown("### Chart 2: Gender Share by Field, Region, and Year"))
display(region_buttons)
display(donut_field_dropdown)
display(donut_year_mode)
display(donut_year_container)
display(donut_chart_output)

### Chart 2: Gender Share by Field, Region, and Year

ToggleButtons(description='Region:', options=('All Regions', 'HSE Dublin and Midlands', 'HSE Dublin and North …

Dropdown(description='Field:', options=('All Fields', 'Medicine', 'Nursing and Midwifery', 'Social Care'), val…

ToggleButtons(description='Year View:', options=('All Years', 'Single Year', 'Year Range'), value='All Years')

Output()

Output()

In [5]:

# Create Field of Study buttons
field_options = ['All Fields'] + sorted(pivot_df['Field of Study'].unique().tolist())
field_buttons = widgets.ToggleButtons(
    options=field_options,
    value='All Fields',
    description='Field:'
)

# Create Year Mode toggle (no 'Single Year')
year_mode = widgets.ToggleButtons(
    options=['All Years', 'Year Range'],
    value='All Years',
    description='Year View:'
)

# Create Year Range slider
year_slider = widgets.IntRangeSlider(
    value=[pivot_df['Graduation Year'].min(), pivot_df['Graduation Year'].max()],
    min=pivot_df['Graduation Year'].min(),
    max=pivot_df['Graduation Year'].max(),
    step=1,
    description='Years:',
    continuous_update=False
)

# Dynamic display of slider
year_widget_container = widgets.Output()

def get_year_widget(view):
    if view == 'Year Range':
        return year_slider
    else:
        return widgets.HTML("")  # Show nothing for "All Years"

def update_year_controls(change):
    year_widget_container.clear_output()
    with year_widget_container:
        display(get_year_widget(change['new']))

year_mode.observe(update_year_controls, names='value')
update_year_controls({'new': year_mode.value})

# Define box plot function
def update_box_plot(field, year_mode, year_range):
    filtered = pivot_df.copy()

    # Filter by field
    if field != 'All Fields':
        filtered = filtered[filtered['Field of Study'] == field]

    # Filter by year range
    if year_mode == 'Year Range':
        start, end = year_range
        filtered = filtered[
            (filtered['Graduation Year'] >= start) & (filtered['Graduation Year'] <= end)
        ]

    # Melt into long format for gender separation
    long_df = filtered.melt(
        id_vars=['Graduation Year', 'HSE Health Regions'],
        value_vars=['Female', 'Male'],
        var_name='Gender',
        value_name='Graduate Count'
    )

    # Create box plot
    fig = px.box(
        long_df,
        x='HSE Health Regions',
        y='Graduate Count',
        color='Gender',
        points='outliers',
        title=f"Graduate Distribution by Gender per Region — {field} ({year_mode})",
        height=500
    )

    # Enhanced hover with Gender and Year
    fig.update_traces(
        hovertemplate=(
            "Region: %{x}<br>" +
            "Graduates: %{y}<br>" +
            "Gender: %{customdata[1]}<br>" +
            "Year: %{customdata[0]}<extra></extra>"
        ),
        customdata=long_df[['Graduation Year', 'Gender']]
    )

    fig.update_layout(
        xaxis_title='HSE Region',
        yaxis_title='Graduate Count',
        legend_title='Gender',
        boxmode='group'
    )

    fig.show()

# Link function to widgets
box_plot_output = interactive_output(update_box_plot, {
    'field': field_buttons,
    'year_mode': year_mode,
    'year_range': year_slider
})

# Display everything
display(Markdown("### Chart 3: Outlier Detection by Region and Gender"))
display(field_buttons)
display(year_mode)
display(year_widget_container)
display(box_plot_output)

### Chart 3: Outlier Detection by Region and Gender

ToggleButtons(description='Field:', options=('All Fields', 'Medicine', 'Nursing and Midwifery', 'Social Care')…

ToggleButtons(description='Year View:', options=('All Years', 'Year Range'), value='All Years')

Output()

Output()

In [6]:
import ipywidgets as widgets

# Generate summary stats
summary_stats = {
    "Total Records": len(pivot_df),
    "Years Covered": f"{pivot_df['Graduation Year'].min()} – {pivot_df['Graduation Year'].max()}",
    "Fields of Study": ', '.join(sorted(pivot_df['Field of Study'].unique())),
    "Genders": "Female, Male",
    "Regions": ', '.join(sorted(pivot_df['HSE Health Regions'].unique()))
}

# Build HTML string
summary_html = "<h3>Dataset Summary</h3><table style='border-collapse: collapse;'>"
for k, v in summary_stats.items():
    summary_html += (
        f"<tr>"
        f"<th style='text-align:left; padding-right:12px; padding-bottom:6px'>{k}</th>"
        f"<td style='padding-bottom:6px'>{v}</td>"
        f"</tr>"
    )
summary_html += "</table>"

# IMPORTANT: Use widgets.HTML, not IPython.display.HTML
summary_panel = widgets.HTML(value=summary_html)