In [5]:
import pandas as pd
from pathlib import Path
import re
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import altair as alt
from vega_datasets import data



BASE_DIR = Path("..")
ORIGINAL_DATA_DIR = BASE_DIR / "original_data"
CLEAN_DATA_DIR = BASE_DIR / "clean_data"
FILE_PATH_TERM = CLEAN_DATA_DIR / "nsf_terminations_airtable.csv"
FILE_PATH_UNI = CLEAN_DATA_DIR / "heis_usa.csv"

In [8]:
terminations = pd.read_csv(FILE_PATH_TERM)
universities = pd.read_csv(FILE_PATH_UNI)

In [4]:
# First idea

us_map = alt.topo_feature(data.us_10m.url, feature="states")

terminated_by_state = (
    terminations
    .groupby("org_state")
    .size()
    .reset_index(name="terminated_count")
)

terminated_by_state["org_state"] = terminated_by_state["org_state"].str.upper().str.strip()

state_names = data.population_engineers_hurricanes()
state_abbrev = state_names[["state", "id"]].drop_duplicates()
state_abbrev.columns = ["state_name", "id"]

state_codes = pd.read_csv(
    "https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv"
)[["Abbreviation", "State"]]
state_codes.columns = ["org_state", "state_name"]

state_fips = pd.merge(state_codes, state_abbrev, on="state_name", how="left")
terminated_map = pd.merge(terminated_by_state, state_fips, on="org_state", how="left")

choropleth = (
    alt.Chart(us_map)
    .mark_geoshape()
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(terminated_map, "id", ["org_state", "terminated_count"])
    )
    .encode(
        color=alt.Color(
            "terminated_count:Q",
            title="Nombre de grants terminades",
            scale=alt.Scale(scheme="reds")
        ),
        tooltip=["org_state:N", "terminated_count:Q"]
    )
    .project("albersUsa")
    .properties(
        title="Grants terminades per estat (NSF)",
        width=700,
        height=400
    )
)

choropleth



In [9]:
# Second idea: adding universities/state information


# ---------------------------
# 1. Agrupar terminacions per estat
# ---------------------------
terminated_by_state = (
    terminations
    .groupby("org_state")
    .size()
    .reset_index(name="terminated_count")
)
terminated_by_state["org_state"] = terminated_by_state["org_state"].str.upper().str.strip()

# ---------------------------
# 2. Comptar universitats / centres per estat
# ---------------------------
universities_by_state = (
    universities.groupby("region")
    .size()
    .reset_index(name="universities_count")
)
universities_by_state["region"] = universities_by_state["region"].str.strip()

# ---------------------------
# 3. Taula amb codis d’estat (sigles i noms)
# ---------------------------
state_codes = pd.read_csv(
    "https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv"
)[["Abbreviation", "State"]]
state_codes.columns = ["org_state", "state_name"]

# Convertim el nom complet (region) a sigla (org_state)
universities_by_state = universities_by_state.merge(
    state_codes, left_on="region", right_on="state_name", how="left"
)

# ---------------------------
# 4. Vincular codis FIPS per a la geografia
# ---------------------------
state_names = data.population_engineers_hurricanes()
state_abbrev = state_names[["state", "id"]].drop_duplicates()
state_abbrev.columns = ["state_name", "id"]

state_fips = pd.merge(state_codes, state_abbrev, on="state_name", how="left")

# ---------------------------
# 5. Combinar terminacions + universitats
# ---------------------------
combined = pd.merge(
    terminated_by_state,
    universities_by_state[["org_state", "universities_count"]],
    on="org_state",
    how="left"
)

# Unim amb codis FIPS per al mapa
terminated_map = pd.merge(combined, state_fips, on="org_state", how="left")

# ---------------------------
# 6. Crear mapa coroplètic
# ---------------------------
us_map = alt.topo_feature(data.us_10m.url, feature="states")

choropleth = (
    alt.Chart(us_map)
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(
            terminated_map,
            "id",
            ["org_state", "terminated_count", "universities_count"]
        )
    )
    .encode(
        color=alt.Color(
            "terminated_count:Q",
            title="Grants terminades",
            scale=alt.Scale(scheme="reds")
        ),
        tooltip=[
            alt.Tooltip("org_state:N", title="Estat"),
            alt.Tooltip("terminated_count:Q", title="Grants terminades"),
            alt.Tooltip("universities_count:Q", title="Universitats / Centres recerca")
        ]
    )
    .project("albersUsa")
    .properties(
        title="Grants terminades i universitats/centres de recerca per estat (NSF)",
        width=700,
        height=400
    )
)

choropleth

In [10]:
# Third idea: ratio of terminated grant / num universities

# ---------------------------
# 1. Agrupar terminacions per estat
# ---------------------------
terminated_by_state = (
    terminations
    .groupby("org_state")
    .size()
    .reset_index(name="terminated_count")
)
terminated_by_state["org_state"] = terminated_by_state["org_state"].str.upper().str.strip()

# ---------------------------
# 2. Comptar universitats / centres per estat
# ---------------------------
universities_by_state = (
    universities.groupby("region")
    .size()
    .reset_index(name="universities_count")
)
universities_by_state["region"] = universities_by_state["region"].str.strip()

# ---------------------------
# 3. Taula amb codis d’estat
# ---------------------------
state_codes = pd.read_csv(
    "https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv"
)[["Abbreviation", "State"]]
state_codes.columns = ["org_state", "state_name"]

# Convertir noms complets a sigles
universities_by_state = universities_by_state.merge(
    state_codes, left_on="region", right_on="state_name", how="left"
)

# ---------------------------
# 4. Codis FIPS per a la geografia
# ---------------------------
state_names = data.population_engineers_hurricanes()
state_abbrev = state_names[["state", "id"]].drop_duplicates()
state_abbrev.columns = ["state_name", "id"]

state_fips = pd.merge(state_codes, state_abbrev, on="state_name", how="left")

# ---------------------------
# 5. Combinar terminacions + universitats
# ---------------------------
combined = pd.merge(
    terminated_by_state,
    universities_by_state[["org_state", "universities_count"]],
    on="org_state",
    how="left"
)

# Calcular el rati (grants terminades per universitat)
combined["terminated_per_university"] = (
    combined["terminated_count"] / combined["universities_count"]
)

# Unir amb codis FIPS per al mapa
terminated_map = pd.merge(combined, state_fips, on="org_state", how="left")

# ---------------------------
# 6. Crear mapa coroplètic
# ---------------------------
us_map = alt.topo_feature(data.us_10m.url, feature="states")

choropleth = (
    alt.Chart(us_map)
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(
            terminated_map,
            "id",
            ["org_state", "terminated_per_university", "terminated_count", "universities_count"]
        )
    )
    .encode(
        color=alt.Color(
            "terminated_per_university:Q",
            title="Grants terminades per universitat",
            scale=alt.Scale(scheme="reds")
        ),
        tooltip=[
            alt.Tooltip("org_state:N", title="Estat"),
            alt.Tooltip("terminated_count:Q", title="Grants terminades"),
            alt.Tooltip("universities_count:Q", title="Universitats"),
            alt.Tooltip("terminated_per_university:Q", title="Rati (Grants/Universitat)", format=".2f")
        ]
    )
    .project("albersUsa")
    .properties(
        title="Rati de grants terminades per universitat / centre de recerca (NSF)",
        width=700,
        height=400
    )
)

choropleth
