In [11]:
#install, imports, load dataset
from pathlib import Path
import pandas as pd
import plotly.express as px
from dash import Dash, Input, Output, dcc, html, callback

# Resolve project root (notebook is in Data_Analytics_Project/notebooks/)
CWD = Path.cwd().resolve()
PROJECT_ROOT = CWD.parent
DATA_PATH = PROJECT_ROOT / "data" / "processed" / "analysis_dataset.csv"

if not DATA_PATH.exists():
    raise FileNotFoundError(f"Not found: {DATA_PATH}")

df = pd.read_csv(DATA_PATH)


In [3]:
# df.dtypes

prize_id                   object
laureate_id                 int64
motivation                 object
share                     float64
portion                    object
sort_order                  int64
award_year                  int64
category                   object
category_full              object
date_awarded               object
prize_status              float64
prize_amount                int64
prize_amount_adjusted       int64
laureate_type              object
known_name                 object
full_name                  object
org_name                   object
gender                     object
birth_date                 object
birth_city                 object
birth_country_now          object
birth_country_original     object
death_date                 object
death_city                 object
death_country_now          object
wikipedia_en               object
wikidata_id                object
decade                      int64
birth_date_parsed          object
date_awarded_p

## Dashboard: Nobel Prizes per Year (Interactive Filters)

This Dash dashboard provides an interactive view of **how many unique Nobel prizes were awarded per year**.

### What this dashboard does
- Loads the minimum and maximum award year (`min_year`, `max_year`) from the dataset.
- Creates two dropdown filters:
  - **Category** (e.g., Peace, Physics, etc.)
  - **Birth country** (optional; based on `birth_country_now`)
- Updates the line chart automatically whenever you change a filter.

### (counting logic)
The dataset contains multiple rows per prize when a prize is shared among multiple laureates.  
To avoid double counting, the chart counts **unique prizes** using:

- `nunique(prize_id)` per year

### Output
- A line chart showing **unique Nobel prizes per year**
- A fallback “No data” message if a filter combination returns no rows


In [34]:
# 1) Create Dash app
app = Dash(__name__)

# 2) Dropdown options
categories = sorted(df["category"].dropna().unique())
countries  = sorted(df["birth_country_now"].dropna().unique())

default_cat = "Peace" if "Peace" in categories else categories[0]
default_country = "Germany" if "Germany" in countries else None

# 3) Layout (filters + chart)
app.layout = html.Div(
    style={"fontFamily": "Arial", "maxWidth": "1100px", "margin": "0 auto"},
    children=[
        html.H2("Nobel Prize Dashboard (1901–2025)", style={"textAlign": "center"}),

        html.Div(
            style={"display": "grid", "gridTemplateColumns": "1fr 1fr", "gap": "12px"},
            children=[
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in categories],
                    value=default_cat,
                    multi=False,
                    id="cat_dd",
                    clearable=False,
                ),
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in countries],
                    value=default_country,   # set to None if you want no default country filter
                    multi=False,
                    id="country_dd",
                    placeholder="Optional: select one birth country",
                    clearable=True,
                ),
            ],
        ),

        html.Br(),
        dcc.Graph(id="main_chart"),
    ],
)

# 4) Callback: update chart when filters change
@app.callback(
    Output("main_chart", "figure"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_chart(selected_cat, selected_country):
    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]

    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    # If filters return no data
    if d.empty:
        title = f"No data for: Category={selected_cat}, Country={selected_country}"
        fig = px.line(title=title)
        fig.update_layout(xaxis_title="Award year", yaxis_title="Number of prizes")
        return fig

    # Unique prizes per year (avoid double counting)
    s = (
        d.groupby("award_year")["prize_id"].nunique()
         .reset_index(name="num_prizes")
         .sort_values("award_year")
    )

    title = f"Prizes per Year (unique prizes) — {selected_cat}" + (f" | {selected_country}" if selected_country else "")
    fig = px.line(s, x="award_year", y="num_prizes", markers=True, title=title)
    fig.update_layout(xaxis_title="Award year", yaxis_title="Number of prizes")
    return fig

# 5) Run dashboard in notebook
app.run(jupyter_mode="inline", port=8050)


In [30]:
#for verification prizes,laureates, country
# d = df[(df["category"] == "Economic Sciences") & (df["birth_country_now"] == "Germany")].copy()

# total_unique_prizes = d["prize_id"].nunique()
# total_unique_laureates = d["laureate_id"].nunique()

# print("Germany (birth country) — Peace")
# print("Unique Peace prizes:", total_unique_prizes)
# print("Unique laureates:", total_unique_laureates)


Germany (birth country) — Peace
Unique Peace prizes: 1
Unique laureates: 1


In [31]:
#for verification prizes,laureates, country
# germany_peace_by_year = (
#     d.groupby("award_year")["prize_id"].nunique()
#      .reset_index(name="n_peace_prizes")
#      .sort_values("award_year")
# )

# display(germany_peace_by_year)


Unnamed: 0,award_year,n_peace_prizes
0,2005,1


## Annual Nobel Prize Trend (Unique Prizes per Year)

This chart shows how many **unique Nobel prizes** were awarded each year from **1901 to 2025**.  
Because the dataset contains one row per **prize–laureate** relationship (shared prizes create multiple rows), it counts **unique `prize_id`** to avoid double counting.

**How to use:**
- Select a **Category** and optionally a **Birth Country** to filter the trend.
- Click a year point to drill down and view the **laureates table** for that year.


In [33]:
# ------------------------------------------------------------
# Nobel Prize Dashboard 
# Features:
# - Dropdown filters (Category, Birth Country)
# - Line chart: Unique prizes per year
# - Click a year point -> Table of laureates for that year
# ------------------------------------------------------------

from pathlib import Path
import pandas as pd
import plotly.express as px
from dash import Dash, html, dcc, dash_table, Input, Output

# -------------------------
# 1) Load data
# -------------------------
PROJECT_ROOT = Path.cwd().resolve().parent  # notebook is in /notebooks
DATA_PATH = PROJECT_ROOT / "data" / "processed" / "analysis_dataset.csv"

df = pd.read_csv(DATA_PATH)

# Ensure required columns exist (guards if your build changes)
required_cols = [
    "award_year", "prize_id", "laureate_id", "category", "birth_country_now",
    "laureate_type", "gender", "known_name", "full_name", "org_name",
    "motivation", "share"
]
for c in required_cols:
    if c not in df.columns:
        df[c] = pd.NA

df["award_year"] = pd.to_numeric(df["award_year"], errors="coerce")

categories = sorted(df["category"].dropna().unique())
countries  = sorted(df["birth_country_now"].dropna().unique())

default_cat = "Peace" if "Peace" in categories else (categories[0] if categories else None)

# -------------------------
# 2) Build app
# -------------------------
app = Dash(__name__)

app.layout = html.Div(
    style={"fontFamily": "Arial", "maxWidth": "1100px", "margin": "0 auto"},
    children=[
        html.H2("Annual Nobel Prize Trend (Unique Prizes per Year)", style={"textAlign": "center"}),

        html.Div(
            style={"display": "grid", "gridTemplateColumns": "1fr 1fr", "gap": "12px"},
            children=[
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in categories],
                    value=default_cat,          # default category
                    multi=False,                # single category (simpler)
                    id="cat_dd",
                    clearable=False,
                    searchable=True,
                    placeholder="Select category",
                ),
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in countries],
                    value=None,                 # None = no country filter
                    multi=False,
                    id="country_dd",
                    clearable=True,
                    searchable=True,
                    placeholder="Optional: select one birth country",
                ),
            ],
        ),

        html.Br(),
        dcc.Graph(id="main_chart"),

        html.Hr(),
        html.H4("Click a year point → Laureates table"),
        html.Div(id="selected_year_text", style={"color": "#555", "marginBottom": "8px"}),

        dash_table.DataTable(
            id="laureates_table",
            columns=[
                {"name": "Name", "id": "name"},
                {"name": "Category", "id": "category"},
                {"name": "Award year", "id": "award_year"},
                {"name": "Birth country", "id": "birth_country_now"},
                {"name": "Gender", "id": "gender"},
                {"name": "Type", "id": "laureate_type"},
                {"name": "Prize ID", "id": "prize_id"},
                {"name": "Share", "id": "share"},
                {"name": "Motivation", "id": "motivation"},
            ],
            data=[],
            page_size=10,
            sort_action="native",
            filter_action="native",
            style_table={"overflowX": "auto"},
            style_cell={"textAlign": "left", "padding": "6px", "whiteSpace": "normal", "height": "auto"},
            style_header={"fontWeight": "bold"},
        ),
    ],
)

# -------------------------
# 3) Callback: update chart
# -------------------------
@app.callback(
    Output("main_chart", "figure"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_chart(selected_cat, selected_country):
    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]

    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    if d.empty:
        title = f"No data for: Category={selected_cat}, Country={selected_country}"
        fig = px.line(title=title)
        fig.update_layout(xaxis_title="Award year", yaxis_title="Number of prizes", clickmode="event+select")
        return fig

    s = (
        d.groupby("award_year")["prize_id"].nunique()
         .reset_index(name="num_prizes")
         .sort_values("award_year")
    )

    title = f"Unique Nobel Prizes per Year — {selected_cat}" + (f" | {selected_country}" if selected_country else "")
    fig = px.line(s, x="award_year", y="num_prizes", markers=True, title=title)
    fig.update_layout(xaxis_title="Award year", yaxis_title="Number of unique prizes", clickmode="event+select")
    return fig

# -------------------------
# 4) Callback: click year -> table
# -------------------------
@app.callback(
    Output("selected_year_text", "children"),
    Output("laureates_table", "data"),
    Input("main_chart", "clickData"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_table(clickData, selected_cat, selected_country):
    if clickData is None:
        return "Click a year point in the chart above to load laureates.", []

    # clicked year from Plotly
    year_clicked = int(float(clickData["points"][0]["x"]))

    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]
    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    d = d[d["award_year"] == year_clicked]

    if d.empty:
        return f"No laureates for year {year_clicked} with current filters.", []

    # readable name
    d = d.copy()
    d["name"] = d["org_name"].fillna(d["known_name"]).fillna(d["full_name"])

    table_df = (
        d[["name","category","award_year","birth_country_now","gender","laureate_type","prize_id","share","motivation"]]
        .drop_duplicates()
        .sort_values(["category", "name"])
    )

    msg = f"Selected year: {year_clicked} | Rows: {len(table_df)} | Unique laureates: {table_df['name'].nunique()}"
    return msg, table_df.to_dict("records")

# -------------------------
# 5) Run inside Jupyter
# -------------------------
app.run(jupyter_mode="inline", port=8050)


In [25]:
#sliders marks and options
category_options = sorted(df["category"].dropna().unique())
type_options = sorted(df["laureate_type"].dropna().unique())
gender_options = sorted(df["gender"].dropna().unique())
country_options = sorted(df["birth_country_now"].dropna().unique())

marks = {y: str(y) for y in range(min_year, max_year + 1, 10)}
marks[min_year] = str(min_year)
marks[max_year] = str(max_year)

In [None]:
#category_options, type_options, gender_options, country_options, marks

In [24]:
#filter and KPIs
def filter_df(base, year_range, categories, types_, genders, countries):
    d = base.copy()
    y0, y1 = year_range
    d = d[(d["award_year"] >= y0) & (d["award_year"] <= y1)]
    if categories: d = d[d["category"].isin(categories)]
    if types_:     d = d[d["laureate_type"].isin(types_)]
    if genders:    d = d[d["gender"].isin(genders)]
    if countries:  d = d[d["birth_country_now"].isin(countries)]
    return d

def kpis(d):
    n_prizes = int(d["prize_id"].nunique())
    n_laureates = int(d["laureate_id"].nunique())

    uniq_laureates = d[["laureate_id", "laureate_type"]].drop_duplicates()
    org_share = (uniq_laureates["laureate_type"].eq("organization").mean() * 100) if len(uniq_laureates) else 0.0

    persons_age = d[(d["laureate_type"]=="person") & (pd.to_numeric(d["age_at_award"], errors="coerce").notna())]
    median_age = persons_age["age_at_award"].median()

    return n_prizes, n_laureates, org_share, median_age



In [26]:
# fig functions
def fig_time(d):
    s = d.groupby("award_year")["prize_id"].nunique().reset_index(name="num_prizes").sort_values("award_year")
    return px.line(s, x="award_year", y="num_prizes", markers=True, title="Prizes per Year (unique prizes)")

def fig_category(d):
    s = d.groupby("category")["prize_id"].nunique().reset_index(name="num_prizes").sort_values("num_prizes", ascending=False)
    fig = px.bar(s, x="num_prizes", y="category", orientation="h", title="Prizes by Category (unique prizes)")
    fig.update_layout(yaxis={"categoryorder": "total ascending"})
    return fig

def fig_countries(d, top_n=15):
    persons = d[d["laureate_type"]=="person"]
    s = (persons.groupby("birth_country_now")["laureate_id"].nunique()
         .reset_index(name="num_laureates")
         .sort_values("num_laureates", ascending=False)
         .head(top_n))
    fig = px.bar(s, x="num_laureates", y="birth_country_now", orientation="h",
                 title=f"Top {top_n} Birth Countries (persons, unique laureates)")
    fig.update_layout(yaxis={"categoryorder": "total ascending"})
    return fig

def fig_sharing(d):
    lp = d.groupby("prize_id")["laureate_id"].nunique().reset_index(name="n_laureates")
    dist = lp["n_laureates"].value_counts().sort_index().reset_index()
    dist.columns = ["n_laureates_sharing_prize", "num_prizes"]
    return px.bar(dist, x="n_laureates_sharing_prize", y="num_prizes",
                  title="Prize Sharing Distribution (# laureates per prize)")


In [27]:
# create app and layout
app = Dash(__name__)

def kpi_card(title, value_id):
    return html.Div(
        [html.Div(title, style={"fontSize":"12px","color":"#666"}),
         html.Div(id=value_id, style={"fontSize":"22px","fontWeight":"700"})],
        style={"border":"1px solid #e6e6e6","borderRadius":"10px","padding":"12px","background":"white","flex":"1","minWidth":"170px"}
    )

app.layout = html.Div(
    style={"fontFamily":"Arial","background":"#f7f7f9","minHeight":"100vh","padding":"16px"},
    children=[
        html.H2("Nobel Prize Winners Dashboard (1901–2025)"),

        html.Div(
            style={"background":"white","border":"1px solid #e6e6e6","borderRadius":"12px","padding":"14px","marginBottom":"12px"},
            children=[
                html.Div("Filters", style={"fontWeight":"700","marginBottom":"10px"}),

                html.Div("Award year range", style={"fontSize":"12px","color":"#666"}),
                dcc.RangeSlider(id="year_range", min=min_year, max=max_year, step=1,
                                value=[min_year, max_year], marks=marks, allowCross=False),

                html.Br(),
                html.Div(style={"display":"grid","gridTemplateColumns":"1fr 1fr","gap":"10px"}, children=[
                    dcc.Dropdown(id="category_dd", options=[{"label":c,"value":c} for c in category_options],
                                 value=[], multi=True, placeholder="All categories"),
                    dcc.Dropdown(id="type_dd", options=[{"label":t,"value":t} for t in type_options],
                                 value=[], multi=True, placeholder="All laureate types"),
                    dcc.Dropdown(id="gender_dd", options=[{"label":g,"value":g} for g in gender_options],
                                 value=[], multi=True, placeholder="All genders"),
                    dcc.Dropdown(id="country_dd", options=[{"label":c,"value":c} for c in country_options],
                                 value=[], multi=True, placeholder="All birth countries"),
                ])
            ]
        ),

        html.Div(style={"display":"flex","gap":"12px","flexWrap":"wrap","marginBottom":"12px"}, children=[
            kpi_card("Unique prizes", "kpi_prizes"),
            kpi_card("Unique laureates", "kpi_laureates"),
            kpi_card("Organizations share", "kpi_org_share"),
            kpi_card("Median age at award (persons)", "kpi_median_age"),
        ]),

        html.Div(style={"display":"grid","gridTemplateColumns":"1fr 1fr","gap":"12px"}, children=[
            html.Div(style={"background":"white","border":"1px solid #e6e6e6","borderRadius":"12px","padding":"8px"},
                     children=[dcc.Graph(id="g_time")]),
            html.Div(style={"background":"white","border":"1px solid #e6e6e6","borderRadius":"12px","padding":"8px"},
                     children=[dcc.Graph(id="g_category")]),
            html.Div(style={"background":"white","border":"1px solid #e6e6e6","borderRadius":"12px","padding":"8px"},
                     children=[dcc.Graph(id="g_countries")]),
            html.Div(style={"background":"white","border":"1px solid #e6e6e6","borderRadius":"12px","padding":"8px"},
                     children=[dcc.Graph(id="g_sharing")]),
        ])
    ]
)


In [28]:
# callback (connect filters - > KPIs + charts)
@app.callback(
    Output("kpi_prizes","children"),
    Output("kpi_laureates","children"),
    Output("kpi_org_share","children"),
    Output("kpi_median_age","children"),
    Output("g_time","figure"),
    Output("g_category","figure"),
    Output("g_countries","figure"),
    Output("g_sharing","figure"),
    Input("year_range","value"),
    Input("category_dd","value"),
    Input("type_dd","value"),
    Input("gender_dd","value"),
    Input("country_dd","value"),
)
def update(year_range, categories, types_, genders, countries):
    d = filter_df(df, year_range, categories, types_, genders, countries)
    n_prizes, n_laureates, org_share, median_age = kpis(d)

    return (
        f"{n_prizes:,}",
        f"{n_laureates:,}",
        f"{org_share:.1f}%",
        "—" if pd.isna(median_age) else f"{median_age:.1f}",
        fig_time(d),
        fig_category(d),
        fig_countries(d, top_n=15),
        fig_sharing(d),
    )


In [29]:
app.run(jupyter_mode="inline", port=8050)


[2026-02-11 10:39:23,720] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\0_DA\Python310\lib\site-packages\dash\dash.py", line 1492, in _prepare_callback
    cb = self.callback_map[output]
KeyError: 'main_chart.figure'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\0_DA\Python310\lib\site-packages\flask\app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\0_DA\Python310\lib\site-packages\flask\app.py", line 919, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\0_DA\Python310\lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\0_DA\Python310\lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
  File "C:\0_DA\Python310\lib\site-p