## Dashboard: Annual Nobel Prize Trend (Unique Prizes per Year)

# Nobel Prize Dashboard (1901–2025)

This notebook runs a small **interactive Dash dashboard** using the prepared file:

- `data/processed/analysis_dataset.csv`

## What we can do
- Filter by **Category** and (optionally) **Birth country**
- See the trend: **Unique prizes per year** (counts `prize_id` to avoid double counting shared prizes)
- **Click a year point** in the chart to display a **laureates table** for that year (names, motivation, share/portion)

> Note: Country is based on `birth_country_now` (birth place), not citizenship or institution.


In [7]:
#install, imports, load dataset
from pathlib import Path
import pandas as pd
import plotly.express as px
from dash import Dash, Input, Output, dcc, html, callback, dash_table

# Resolve project root (notebook is in Data_Analytics_Project/notebooks/)
CWD = Path.cwd().resolve()
PROJECT_ROOT = CWD.parent
DATA_PATH = PROJECT_ROOT / "data" / "processed" / "analysis_dataset.csv"

if not DATA_PATH.exists():
    raise FileNotFoundError(f"Not found: {DATA_PATH}")

df = pd.read_csv(DATA_PATH)


## Build the Dash app (layout + callbacks + run)

In [18]:
# ------------------------------------------------------------
# Nobel Prize Dashboard 
# Features:
# - Dropdown filters (Category, Birth Country)
# - Line chart: Unique prizes per year
# - Click a year point -> Table of laureates for that year
# ------------------------------------------------------------


# 1) Ensure required columns exist (guards if your build changes)
required_cols = [
    "award_year", "prize_id", "laureate_id", "category", "birth_country_now",
    "laureate_type", "gender", "known_name", "full_name", "org_name",
    "motivation", "portion"
]
for c in required_cols:
    if c not in df.columns:
        df[c] = pd.NA

df["award_year"] = pd.to_numeric(df["award_year"], errors="coerce")

categories = sorted(df["category"].dropna().unique())
countries  = sorted(df["birth_country_now"].dropna().unique())

default_cat = "Peace" if "Peace" in categories else (categories[0] if categories else None)

# -------------------------
# 2) Build app
# -------------------------
app = Dash(__name__)

app.layout = html.Div(
    style={"fontFamily": "Arial", "maxWidth": "1100px", "margin": "0 auto"},
    children=[
        html.H2("Annual Nobel Prize Trend (Unique Prizes per Year)", style={"textAlign": "center"}),

        html.Div(
            style={"display": "grid", "gridTemplateColumns": "1fr 1fr", "gap": "12px"},
            children=[
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in categories],
                    value=default_cat,          # default category
                    multi=False,                # single category (simpler)
                    id="cat_dd",
                    clearable=False,
                    searchable=True,
                    placeholder="Select category",
                ),
                dcc.Dropdown(
                    options=[{"label": c, "value": c} for c in countries],
                    value=None,                 # None = no country filter
                    multi=False,
                    id="country_dd",
                    clearable=True,
                    searchable=True,
                    placeholder="Optional: select one birth country",
                ),
            ],
        ),

        html.Br(),

        html.Div(
            id="kpi_summary",
            style={
                "textAlign": "center",
                "padding": "10px",
                "border": "1px solid #ddd",
                "borderRadius": "8px",
                "backgroundColor": "#fafafa",
                "marginBottom": "10px",
            },
        ),

        
        dcc.Graph(id="main_chart"),

        html.Hr(),
        html.H4("Click a year point → Laureates table"),
        html.Div(id="selected_year_text", style={"color": "#555", "marginBottom": "8px"}),

        dash_table.DataTable(
            id="laureates_table",
            columns=[
                {"name": "Name", "id": "name"},
                {"name": "Category", "id": "category"},
                {"name": "Award year", "id": "award_year"},
                {"name": "Birth country", "id": "birth_country_now"},
                {"name": "Gender", "id": "gender"},
                {"name": "Type", "id": "laureate_type"},
                {"name": "Prize ID", "id": "prize_id"},
                {"name": "portion", "id": "portion"},
                {"name": "Motivation", "id": "motivation"},
            ],
            data=[],
            page_size=10,
            sort_action="native",
            filter_action="native",
            style_table={"overflowX": "auto"},
            style_cell={"textAlign": "left", "padding": "6px", "whiteSpace": "normal", "height": "auto"},
            style_header={"fontWeight": "bold"},
        ),
    ],
)

# -------------------------
# 3) Callback: update chart
# -------------------------
@app.callback(
    Output("main_chart", "figure"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_chart(selected_cat, selected_country):
    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]

    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    if d.empty:
        title = f"No data for: Category={selected_cat}, Country={selected_country}"
        fig = px.line(title=title)
        fig.update_layout(xaxis_title="Award year", yaxis_title="Number of prizes", clickmode="event+select")
        return fig

    s = (
        d.groupby("award_year")["prize_id"].nunique()
         .reset_index(name="num_prizes")
         .sort_values("award_year")
    )

    title = f"Unique Nobel Prizes per Year — {selected_cat}" + (f" | {selected_country}" if selected_country else "")
    fig = px.line(s, x="award_year", y="num_prizes", markers=True, title=title)
    fig.update_layout(xaxis_title="Award year", yaxis_title="Number of unique prizes", clickmode="event+select")
    return fig

# -------------------------
# 4) Callback: click year -> table
# -------------------------
@app.callback(
    Output("selected_year_text", "children"),
    Output("laureates_table", "data"),
    Input("main_chart", "clickData"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_table(clickData, selected_cat, selected_country):
    if clickData is None:
        return "Click a year point in the chart above to load laureates.", []

    # clicked year from Plotly
    year_clicked = int(float(clickData["points"][0]["x"]))

    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]
    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    d = d[d["award_year"] == year_clicked]

    if d.empty:
        return f"No laureates for year {year_clicked} with current filters.", []

    # readable name
    d = d.copy()
    d["name"] = d["org_name"].fillna(d["known_name"]).fillna(d["full_name"])

    table_df = (
        d[["name","category","award_year","birth_country_now","gender","laureate_type","prize_id","portion","motivation"]]
        .drop_duplicates()
        .sort_values(["category", "name"])
    )

    msg = f"Selected year: {year_clicked} | Rows: {len(table_df)} | Unique laureates: {table_df['name'].nunique()}"
    return msg, table_df.to_dict("records")
    

# -------------------------
# 5) callback: to update KPI
# -------------------------
@app.callback(
    Output("kpi_summary", "children"),
    Input("cat_dd", "value"),
    Input("country_dd", "value"),
)
def update_kpis(selected_cat, selected_country):
    d = df.copy()

    if selected_cat:
        d = d[d["category"] == selected_cat]

    if selected_country:
        d = d[d["birth_country_now"] == selected_country]

    n_prizes = d["prize_id"].nunique()
    n_laureates = d["laureate_id"].nunique()
    yr_min = int(d["award_year"].min()) if not d.empty else None
    yr_max = int(d["award_year"].max()) if not d.empty else None

    country_label = selected_country if selected_country else "All countries"
    cat_label = selected_cat if selected_cat else "All categories"

    if d.empty:
        return f"No data for {cat_label} | {country_label}"

    return (
        f"{cat_label} | {country_label}  —  "
        f"Unique prizes: {n_prizes}  |  Unique laureates: {n_laureates}  |  "
        f"Years: {yr_min}–{yr_max}"
    )

# -------------------------
# 5) Run inside Jupyter
# -------------------------
app.run(jupyter_mode="inline", port=8050)


In [None]:
#for verification prizes,laureates, country
# d = df[(df["category"] == "Economic Sciences") & (df["birth_country_now"] == "Germany")].copy()

# total_unique_prizes = d["prize_id"].nunique()
# total_unique_laureates = d["laureate_id"].nunique()

# print("Germany (birth country) — Peace")
# print("Unique Peace prizes:", total_unique_prizes)
# print("Unique laureates:", total_unique_laureates)

In [None]:
#for verification prizes,laureates, country
# germany_peace_by_year = (
#     d.groupby("award_year")["prize_id"].nunique()
#      .reset_index(name="n_peace_prizes")
#      .sort_values("award_year")
# )

# display(germany_peace_by_year)


In [None]:
# df = df.copy()
# df["name"] = df["org_name"].fillna(df["known_name"]).fillna(df["full_name"])


In [None]:
# df.loc[df["prize_id"]=="1988_chemistry", ["name","share","portion","motivation"]].head(10)
