# SCI → Mapping → Nodes: lettura, copertura e costruzione nodi

Questo notebook:
1) legge il TSV SCI e il mapping livelli;
2) fa controlli e un mini-report di copertura;
3) costruisce la lista nodi (NUTS3 / GADM2 / US COUNTIES) con lat/lon;
4) salva `node_list.csv`.

> Imposta prima i percorsi in **Cella 1**.


In [1]:
from __future__ import annotations

from typing import Iterable, Dict, Optional, Tuple, List
import re

import pandas as pd
import geopandas as gpd
import fiona
from IPython.display import display

# === Percorsi (modifica se necessario) ===
TSV_PATH = "data/gadm1_nuts3_counties-gadm1_nuts3_counties - FB Social Connectedness Index - October 2021.tsv"
MAP_PATH = "data/gadm1_nuts3_counties_levels.csv"

# Geodati
NUTS_GEOJSON_PATH   = "data/NUTS_RG_60M_2016_4326_LEVL_3.geojson"
US_COUNTIES_PATH    = "data/us-county-boundaries.geojson"
GADM_GPKG_PATH      = "data/gadm_410.gpkg"   # layer tipico: "gadm_410"

# Parametri lettura SCI
SCI_COLS  = ["user_loc", "fr_loc", "scaled_sci"]
DTYPE_MAP = {"user_loc": "string", "fr_loc": "string", "scaled_sci": "float64"}


In [2]:
def _normalize_location_code(s: pd.Series) -> pd.Series:
    """
    Normalizza i codici location: stringa, strip, upper (non tocca NaN).
    """
    return s.astype("string").str.strip().str.upper()

def _ensure_columns(df: pd.DataFrame, required: Iterable[str], where: str = "") -> None:
    """
    Verifica che il DataFrame contenga le colonne richieste.
    """
    missing = [c for c in required if c not in df.columns]
    if missing:
        loc = f" in {where}" if where else ""
        raise ValueError(f"Mancano colonne{loc}: {missing}")

def _preview(df: pd.DataFrame, name: str = "DataFrame", n: int = 5) -> None:
    """
    Stampa anteprima, schema e numero righe.
    """
    print(f"== {name} – prime righe ==")
    display(df.head(n))
    print("\nSchema:")
    print(df.dtypes)
    print(f"\nNumero righe: {len(df):,}")


In [16]:
def load_sci_tsv(
    path: str,
    sci_cols: Iterable[str] = ("user_loc", "fr_loc", "scaled_sci"),
    dtype_map: Optional[Dict[str, str]] = None,
    low_memory: bool = False
) -> pd.DataFrame:
    if dtype_map is None:
        dtype_map = {"user_loc": "string", "fr_loc": "string", "scaled_sci": "float64"}

    df = pd.read_csv(
        path,
        sep="\t",
        usecols=list(sci_cols),
        dtype=dtype_map,
        low_memory=low_memory
    )

    # normalizza codici
    if "user_loc" in df.columns:
        df["user_loc"] = _normalize_location_code(df["user_loc"])
    if "fr_loc" in df.columns:
        df["fr_loc"] = _normalize_location_code(df["fr_loc"])

    _ensure_columns(df, sci_cols, where="SCI TSV")
    return df

def load_levels_mapping(
    path: str,
    usecols: Iterable[str] = ("key", "level"),
    rename_map: Dict[str, str] = {"key": "location_code", "level": "level_type"},
) -> pd.DataFrame:
    df = pd.read_csv(path, usecols=list(usecols), dtype="string").rename(columns=rename_map)
    _ensure_columns(df, ["location_code", "level_type"], where="Mapping livelli")
    df["location_code"] = _normalize_location_code(df["location_code"])
    return df

def quick_read_inputs(
    tsv_path: str,
    map_path: str,
    sci_cols: Iterable[str] = ("user_loc", "fr_loc", "scaled_sci"),
    dtype_map: Optional[Dict[str, str]] = None
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    df_sci = load_sci_tsv(tsv_path, sci_cols=sci_cols, dtype_map=dtype_map)
    _preview(df_sci, name="SCI (TSV)")
    df_map = load_levels_mapping(map_path)
    _preview(df_map, name="Mapping livelli")
    return df_sci, df_map

# Esecuzione lettura
df_sci, df_map = quick_read_inputs(TSV_PATH, MAP_PATH, sci_cols=SCI_COLS, dtype_map=DTYPE_MAP)

# Facoltativo: distribuzione tipi livello
print("\n== level_type value_counts ==")
display(df_map['level_type'].value_counts())


== SCI (TSV) – prime righe ==


Unnamed: 0,user_loc,fr_loc,scaled_sci
0,ABW,ABW,11264841.0
1,ABW,AGO1,38.0
2,ABW,AGO10,34.0
3,ABW,AGO11,32.0
4,ABW,AGO12,23.0



Schema:
user_loc      string[python]
fr_loc        string[python]
scaled_sci           float64
dtype: object

Numero righe: 63,824,121
== Mapping livelli – prime righe ==


Unnamed: 0,location_code,level_type
0,AND,country
1,ATG,country
2,ABW,country
3,BHS,country
4,BRB,country



Schema:
location_code    string[python]
level_type       string[python]
dtype: object

Numero righe: 8,008

== level_type value_counts ==


level_type
county     3229
gadm1      1839
nuts3      1522
gadm2      1370
country      48
Name: count, dtype: Int64

ABOUT COVERAGE IN THE DATASET

In [None]:
def get_unique_location_codes(df_sci: pd.DataFrame) -> pd.Series:
    _ensure_columns(df_sci, ["user_loc", "fr_loc"], where="SCI")
    u = _normalize_location_code(df_sci["user_loc"])
    v = _normalize_location_code(df_sci["fr_loc"])
    return pd.Index(u).append(pd.Index(v)).astype("string").unique()

def get_mapped_codes(df_map: pd.DataFrame) -> pd.Series:
    _ensure_columns(df_map, ["location_code"], where="Mapping livelli")
    return _normalize_location_code(df_map["location_code"]).dropna().unique()

def compute_node_coverage(df_sci: pd.DataFrame, df_map: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, float]]:
    sci_codes = pd.Series(get_unique_location_codes(df_sci), name="location_code")
    map_codes = pd.Series(get_mapped_codes(df_map), name="location_code")

    total_codes = sci_codes.size
    mapped_mask = sci_codes.isin(set(map_codes))
    mapped_count = int(mapped_mask.sum())
    unmapped = (
        sci_codes[~mapped_mask].to_frame().drop_duplicates()
        .sort_values("location_code").reset_index(drop=True)
    )

    summary = {
        "total_unique_codes": int(total_codes),
        "mapped_unique_codes": int(mapped_count),
        "unmapped_unique_codes": int(total_codes - mapped_count),
        "node_coverage_pct": (mapped_count / total_codes * 100.0) if total_codes else 0.0,
    }
    return unmapped, summary

def compute_edge_coverage(df_sci: pd.DataFrame, df_map: pd.DataFrame) -> Dict[str, float]:
    _ensure_columns(df_sci, ["user_loc", "fr_loc"], where="SCI")
    mapped_set = set(get_mapped_codes(df_map))

    u = _normalize_location_code(df_sci["user_loc"])
    v = _normalize_location_code(df_sci["fr_loc"])

    both_mapped_mask = u.isin(mapped_set) & v.isin(mapped_set)
    total_rows = int(len(df_sci))
    valid_rows = int(both_mapped_mask.sum())

    return {
        "total_rows": total_rows,
        "valid_rows_both_mapped": valid_rows,
        "edge_coverage_pct": (valid_rows / total_rows * 100.0) if total_rows else 0.0,
    }

def compute_country_coverage(
    df_sci: pd.DataFrame,
    df_map: pd.DataFrame,
    iso_col: str = "country_ISO3",
    keep_only_intra_country: bool = True
) -> Optional[pd.DataFrame]:
    if iso_col not in df_map.columns:
        print(f"[AVVISO] Colonna '{iso_col}' assente nel mapping: copertura per paese non calcolata.")
        return None

    _ensure_columns(df_sci, ["user_loc", "fr_loc"], where="SCI")

    df_map_local = df_map[["location_code", iso_col]].copy()
    df_map_local["location_code"] = _normalize_location_code(df_map_local["location_code"])
    code2iso = (
        df_map_local.dropna().drop_duplicates("location_code")
        .set_index("location_code")[iso_col]
    )

    sci = df_sci[["user_loc", "fr_loc"]].copy()
    sci["user_loc"] = _normalize_location_code(sci["user_loc"])
    sci["fr_loc"]   = _normalize_location_code(sci["fr_loc"])

    sci["iso_from"] = sci["user_loc"].map(code2iso)
    sci["iso_to"]   = sci["fr_loc"].map(code2iso)

    sci_valid = sci.dropna(subset=["iso_from", "iso_to"]).copy()
    if keep_only_intra_country:
        sci_valid = sci_valid[sci_valid["iso_from"] == sci_valid["iso_to"]].copy()

    edges_by_country = (
        sci_valid.groupby("iso_from", as_index=False)
        .agg(edges=("user_loc", "size")).rename(columns={"iso_from": iso_col})
    )

    nodes_from = sci_valid[["iso_from", "user_loc"]].rename(columns={"iso_from": iso_col, "user_loc": "loc"})
    nodes_to   = sci_valid[["iso_to", "fr_loc"]].rename(columns={"iso_to": iso_col, "fr_loc": "loc"})
    nodes_all  = pd.concat([nodes_from, nodes_to], ignore_index=True).drop_duplicates()

    nodes_by_country = nodes_all.groupby(iso_col, as_index=False).agg(nodes=("loc", "nunique"))

    country_cov = edges_by_country.merge(nodes_by_country, on=iso_col, how="outer").fillna(0)
    country_cov["edges"] = country_cov["edges"].astype(int)
    country_cov["nodes"] = country_cov["nodes"].astype(int)

    total_edges = int(country_cov["edges"].sum()) if len(country_cov) else 0
    country_cov["edges_pct"] = (100.0 * country_cov["edges"] / total_edges) if total_edges else 0.0

    return country_cov.sort_values(["edges", "nodes"], ascending=False).reset_index(drop=True)

def coverage_report(df_sci: pd.DataFrame, df_map: pd.DataFrame, top_n: int = 10) -> None:
    unmapped_nodes, node_summary = compute_node_coverage(df_sci, df_map)
    print("== Copertura NODI ==")
    for k, v in node_summary.items():
        print(f"- {k}: {v}")
    if len(unmapped_nodes) > 0:
        print(f"\nEsempi codici NON mappati ({min(10, len(unmapped_nodes))}):")
        display(unmapped_nodes.head(10))

    edge_summary = compute_edge_coverage(df_sci, df_map)
    print("\n== Copertura ARCHI ==")
    for k, v in edge_summary.items():
        print(f"- {k}: {v}")

    country_cov = compute_country_coverage(df_sci, df_map)
    if country_cov is not None and len(country_cov):
        print("\n== Copertura PER PAESE (prime righe) ==")
        display(country_cov.head(top_n))
    else:
        print("\n== Copertura PER PAESE ==")
        print("Mapping senza 'country_ISO3' → se vuoi questa sezione, aggiungi la colonna a df_map.")

# Esecuzione report
coverage_report(df_sci, df_map)


In [17]:
def _representative_points(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    gdf = gdf.to_crs(4326)
    gdf["geometry"] = gdf["geometry"].representative_point()
    gdf["latitude"] = gdf.geometry.y
    gdf["longitude"] = gdf.geometry.x
    return gdf

def load_nuts3_points(nuts_geojson_path: str, code_col: Optional[str] = None) -> pd.DataFrame:
    gdf = gpd.read_file(nuts_geojson_path)
    levl_col = next((c for c in ["LEVL_CODE", "LEVL", "LEVEL"] if c in gdf.columns), None)
    if levl_col is not None:
        gdf = gdf[gdf[levl_col].astype(str).isin(["3", 3])].copy()

    candidates = [code_col] if code_col else None
    if not candidates or candidates == [None]:
        candidates = [c for c in ["NUTS_ID", "nuts_id", "ID", "id"] if c in gdf.columns]
    if not candidates:
        raise ValueError("Non trovo una colonna codice per NUTS (es. 'NUTS_ID'). Passa code_col=...")

    cc = candidates[0]
    gdf = gdf.rename(columns={cc: "code"})
    gdf = _representative_points(gdf[["code", "geometry"]].dropna())
    gdf["code"] = _normalize_location_code(gdf["code"])
    return gdf[["code", "latitude", "longitude"]].drop_duplicates("code")

def load_gadm2_points(gadm_gpkg_path: str, layer: str = "gadm_410", code_col: str = "GID_2") -> pd.DataFrame:
    gdf = gpd.read_file(gadm_gpkg_path, layer=layer)
    if code_col not in gdf.columns:
        raise ValueError(f"La colonna {code_col!r} non esiste nel layer {layer}. Colonne trovate: {list(gdf.columns)}")

    gdf2 = gdf[~gdf[code_col].isna()].copy().rename(columns={code_col: "code"})
    gdf2 = gdf2.to_crs(4326)
    gdf2["geometry"] = gdf2.geometry.representative_point()
    gdf2["latitude"] = gdf2.geometry.y
    gdf2["longitude"] = gdf2.geometry.x
    gdf2["code"] = gdf2["code"].astype("string").str.strip().str.upper()

    out = gdf2[["code", "latitude", "longitude"]].drop_duplicates("code")
    print(f"[GADM2] Caricate {len(out):,} unità ADM2 uniche da {gadm_gpkg_path}")
    return out

def load_us_counties_points(counties_geojson_path: str, code_col: Optional[str] = None) -> pd.DataFrame:
    gdf = gpd.read_file(counties_geojson_path)
    candidates = [code_col] if code_col else None
    if not candidates or candidates == [None]:
        candidates = [c for c in ["GEOID", "geoid", "FIPS", "fips"] if c in gdf.columns]
    if not candidates:
        raise ValueError("Non trovo una colonna codice per US counties (es. 'GEOID'). Passa code_col=...")

    cc = candidates[0]
    gdf = gdf.rename(columns={cc: "code"})
    gdf["code"] = gdf["code"].astype(str).str.zfill(5)
    gdf = _representative_points(gdf[["code", "geometry"]].dropna())
    gdf["code"] = _normalize_location_code(gdf["code"])
    return gdf[["code", "latitude", "longitude"]].drop_duplicates("code")

def load_gadm1_points(gadm_gpkg_path: str, layer: str = "gadm_410", code_col: str = "GID_1") -> pd.DataFrame:
    gdf = gpd.read_file(gadm_gpkg_path, layer=layer)
    if code_col not in gdf.columns:
        raise ValueError(f"Colonna {code_col!r} assente in {layer}. Colonne: {list(gdf.columns)}")
    gdf1 = gdf[~gdf[code_col].isna()].copy().rename(columns={code_col: "code"})
    gdf1 = gdf1.to_crs(4326)
    gdf1["geometry"] = gdf1.geometry.representative_point()
    gdf1["latitude"] = gdf1.geometry.y
    gdf1["longitude"] = gdf1.geometry.x
    gdf1["code"] = gdf1["code"].astype("string").str.strip().str.upper()
    return gdf1[["code", "latitude", "longitude"]].drop_duplicates("code")





In [22]:
def select_target_codes(
    df_sci: pd.DataFrame,
    df_map: pd.DataFrame,
    level_types: Iterable[str] = ("NUTS3", "GADM1", "COUNTY"),
    sci_required: Iterable[str] = ("user_loc", "fr_loc"),
    map_required: Iterable[str] = ("location_code", "level_type"),
) -> pd.DataFrame:
    # Controlli colonne corretti
    _ensure_columns(df_sci, sci_required, "SCI")
    _ensure_columns(df_map, map_required, "Mapping")

    # Codici presenti nello SCI (user_loc ∪ fr_loc)
    sci_codes = pd.Index(_normalize_location_code(df_sci["user_loc"])) \
                  .append(pd.Index(_normalize_location_code(df_sci["fr_loc"]))) \
                  .unique()

    # Filtra mapping per i tipi desiderati e normalizza
    df_map2 = df_map.copy()
    df_map2["location_code"] = _normalize_location_code(df_map2["location_code"])
    df_map2["level_type"] = df_map2["level_type"].astype("string")

    wanted = {t.upper() for t in level_types}
    df_map2 = df_map2[df_map2["level_type"].str.upper().isin(wanted)]

    # Intersezione SCI ∩ mapping e deduplica
    target = (df_map2[df_map2["location_code"].isin(set(sci_codes))]
              .drop_duplicates(subset=["location_code", "level_type"])
              .reset_index(drop=True))

    print(f"[INFO] Target codes selezionati: {len(target):,} "
          f"(tipi: {sorted(target['level_type'].str.upper().unique())})")
    return target



def normalize_gadm1_mapping_code(code: Optional[str]) -> Optional[str]:
    """
    Esempi attesi nel mapping: 'BGD1', 'IND23', ...
    Output (GADM v4): 'ISO3.ADM1_1'  es. 'BGD.1_1'
    """
    if code is None or pd.isna(code): 
        return None
    s = str(code).strip().upper()
    m = re.fullmatch(r'([A-Z]{3})(\d+)', s)
    if not m:
        return None
    iso3, adm1 = m.groups()
    return f"{iso3}.{int(adm1)}_1"

def normalize_county_mapping_code(code: Optional[str]) -> Optional[str]:
    """
    'USA06091' -> '06091' (GEOID a 5 cifre).
    """
    if code is None or pd.isna(code):
        return None
    s = str(code).strip().upper()
    if s.startswith("USA"):
        s = s[3:]
    s = re.sub(r'\D', '', s)
    if len(s) == 5:
        return s
    if 1 <= len(s) <= 5:
        return s.zfill(5)
    return None

def build_nodes_for_type_with_transform(
    target_codes: pd.DataFrame,
    type_name: str,
    source_df_points: pd.DataFrame,
    transform_fn=None
) -> pd.DataFrame:
    mask = target_codes["level_type"].str.upper() == type_name.upper()
    wanted = target_codes.loc[mask, ["location_code"]].copy()
    wanted["location_code"] = wanted["location_code"].astype("string").str.strip().str.upper()

    if transform_fn is not None:
        wanted["code"] = wanted["location_code"].map(transform_fn)
    else:
        wanted["code"] = wanted["location_code"]

    wanted = wanted.dropna(subset=["code"]).drop_duplicates("code")

    pts = source_df_points.copy()
    pts["code"] = pts["code"].astype("string").str.strip().str.upper()

    out = wanted.merge(pts, on="code", how="inner")
    out = out.rename(columns={"code": "nodeLabel"})
    out["level_type"] = type_name.upper()
    return out[["nodeLabel", "latitude", "longitude", "level_type"]].drop_duplicates("nodeLabel")

def build_nodes_for_type(
    target_codes: pd.DataFrame,
    type_name: str,
    source_df_points: pd.DataFrame
) -> pd.DataFrame:
    return build_nodes_for_type_with_transform(
        target_codes=target_codes,
        type_name=type_name,
        source_df_points=source_df_points,
        transform_fn=None
    )

def assemble_node_list(
    nuts_nodes: Optional[pd.DataFrame] = None,
    gadm_nodes: Optional[pd.DataFrame] = None,   # qui passeremo GADM1
    county_nodes: Optional[pd.DataFrame] = None
) -> pd.DataFrame:
    frames = [df for df in [nuts_nodes, gadm_nodes, county_nodes] if df is not None and len(df)]
    if not frames:
        raise ValueError("Nessun nodo fornito.")

    nodes = pd.concat(frames, ignore_index=True).drop_duplicates("nodeLabel")
    nodes = nodes.sort_values("nodeLabel").reset_index(drop=True)
    nodes.insert(0, "nodeID", range(1, len(nodes) + 1))
    return nodes[["nodeID", "nodeLabel", "latitude", "longitude"]]


def build_nodes_for_type(
    target_codes: pd.DataFrame,
    type_name: str,
    source_df_points: pd.DataFrame
) -> pd.DataFrame:
    return build_nodes_for_type_with_transform(
        target_codes=target_codes,
        type_name=type_name,
        source_df_points=source_df_points,
        transform_fn=None
    )



OPTIONAL JUST CHECKS

In [None]:
# Elenco layer del GPKG GADM (utile per capire i nomi giusti)
layers = fiona.listlayers(GADM_GPKG_PATH)
print(f"Layer trovati in {GADM_GPKG_PATH}:")
for i, lyr in enumerate(layers, 1):
    print(f"{i:>2}. {lyr}")

# ATTENZIONE: se la tua versione di GeoPandas NON supporta 'rows=' in read_file,
# rimuovi 'rows=5' qui sotto (è solo una lettura di anteprima).
try:
    nuts_sample = gpd.read_file(NUTS_GEOJSON_PATH, rows=5)
except TypeError:
    nuts_sample = gpd.read_file(NUTS_GEOJSON_PATH)
print("\nColonne disponibili in NUTS3 GeoJSON:")
print(list(nuts_sample.columns))
display(nuts_sample.head())

try:
    counties_sample = gpd.read_file(US_COUNTIES_PATH, rows=5)
except TypeError:
    counties_sample = gpd.read_file(US_COUNTIES_PATH)
print("\nColonne disponibili in US counties GeoJSON:")
print(list(counties_sample.columns))
display(counties_sample.head())


OPTIONAL JUST CHECKS

In [None]:
def inspect_gadm2(gpkg_path: str, layer: str = "gadm_410", prefer_code_cols=("GID_2","ID_2","GID2")):
    gdf = gpd.read_file(gpkg_path, layer=layer)
    print(f"[INFO] Caricato layer='{layer}' con {len(gdf):,} feature")
    print(f"[INFO] CRS: {gdf.crs}")
    print("[INFO] Colonne disponibili:")
    print(list(gdf.columns))

    cols_up = {c.upper(): c for c in gdf.columns}
    code_col = None
    for pref in prefer_code_cols:
        if pref in cols_up:
            code_col = cols_up[pref]
            break
    if code_col is None:
        candidates = [orig for up, orig in cols_up.items() if up.endswith("_2") or "GID" in up]
        if not candidates:
            raise ValueError("Non trovo una colonna codice per ADM2 (tipo 'GID_2' o 'ID_2').")
        code_col = candidates[0]

    print(f"[INFO] Colonna codice ADM2 individuata: {code_col!r}")
    gdf_adm2 = gdf[~gdf[code_col].isna()].copy()
    print(f"[INFO] Righe ADM2 (non-NaN su {code_col}): {len(gdf_adm2):,}")

    unique_codes = gdf_adm2[code_col].astype("string").str.strip().str.upper().nunique()
    print(f"[INFO] Codici ADM2 unici: {unique_codes:,}")

    geom_types = gdf_adm2.geom_type.value_counts().to_dict()
    print(f"[INFO] Geometrie (conteggio per tipo): {geom_types}")

    candidates_name = [c for c in ["NAME_0","NAME_1","NAME_2","GID_0","GID_1","GID_2"] if c in gdf_adm2.columns]
    show_cols = [code_col] + candidates_name
    show_cols = [c for c in dict.fromkeys(show_cols)]  # dedupe preservando ordine

    print("\n== Anteprima ADM2 (prime 5 righe) ==")
    display(gdf_adm2[show_cols].head())

    if any(c in gdf_adm2.columns for c in ["NAME_0","NAME_1","NAME_2","GID_0","GID_1","GID_2"]):
        print("\n== Missing count su colonne comuni ==")
        check_cols = [c for c in ["NAME_0","NAME_1","NAME_2","GID_0","GID_1","GID_2"] if c in gdf_adm2.columns]
        display(gdf_adm2[check_cols].isna().sum().to_frame("missing"))

    return gdf, gdf_adm2, code_col

# Esecuzione diagnostica
gdf_full, gdf_adm2, gadm_code_col = inspect_gadm2(GADM_GPKG_PATH, layer="gadm_410")


In [23]:
# 0) Restrizione ai codici presenti in SCI + mapping (NUTS3 / GADM1 / COUNTY)
target = select_target_codes(
    df_sci=df_sci,
    df_map=df_map,
    level_types=("NUTS3", "GADM1", "COUNTY")  # <-- GADM1
)

# 1) Carico punti per ciascun tipo
nuts_pts    = load_nuts3_points(NUTS_GEOJSON_PATH)
county_pts  = load_us_counties_points(US_COUNTIES_PATH)
gadm1_pts   = load_gadm1_points(GADM_GPKG_PATH, layer="gadm_410", code_col="GID_1")

print("NUTS pts:", len(nuts_pts), "— US counties pts:", len(county_pts), "— GADM1 pts:", len(gadm1_pts))

# 2) Join con eventuale trasformazione delle chiavi
gadm1_nodes  = build_nodes_for_type_with_transform(target, "GADM1",  gadm1_pts,  transform_fn=normalize_gadm1_mapping_code)
county_nodes = build_nodes_for_type_with_transform(target, "COUNTY", county_pts, transform_fn=normalize_county_mapping_code)
nuts_nodes   = build_nodes_for_type_with_transform(target, "NUTS3",  nuts_pts,   transform_fn=None)

print("NUTS3 selezionati:", len(nuts_nodes))
print("GADM1 selezionati:", len(gadm1_nodes))
print("COUNTY selezionati:", len(county_nodes))


[INFO] Target codes selezionati: 6,566 (tipi: ['COUNTY', 'GADM1', 'NUTS3'])


Skipping field geo_point_2d: unsupported OGR type: 3


NUTS pts: 1522 — US counties pts: 3233 — GADM1 pts: 3662
NUTS3 selezionati: 1522
GADM1 selezionati: 1738
COUNTY selezionati: 3225


In [24]:
# 3) Assemblaggio finale (qui 'gadm_nodes' = GADM1)
node_list = assemble_node_list(nuts_nodes, gadm1_nodes, county_nodes)

display(node_list.head())
print(f"Totale nodi: {len(node_list):,}")

# 4) Salvataggio
OUTPUT_CSV = "node_list.csv"
node_list.to_csv(OUTPUT_CSV, index=False)
print(f"✅ Salvato: {OUTPUT_CSV}")


Unnamed: 0,nodeID,nodeLabel,latitude,longitude
0,1,1001,32.507734,-86.65123
1,2,1003,30.732698,-87.762633
2,3,1005,31.882808,-85.392945
3,4,1007,33.039116,-87.096651
4,5,1009,34.012895,-86.533567


Totale nodi: 6,485
✅ Salvato: node_list.csv
