## 0. Setting

In [1]:
# 1. Standard libraries
import os
import gc
import glob
from typing import List

# 2. Third-party libraries
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.ops import nearest_points

# 3. Visualization libraries
import matplotlib.image as mimg
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
from matplotlib import font_manager as fm
from matplotlib.collections import LineCollection
from matplotlib.colors import TwoSlopeNorm
from matplotlib.lines import Line2D
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib.ticker import ScalarFormatter
from matplotlib.ticker import LogLocator
from matplotlib.ticker import FuncFormatter
import seaborn as sns

# --- Set current working directory ---
# Change the working directory to the project root
import os
if os.getcwd().endswith('notebooks'):
    os.chdir('..')

# --- Matplotlib font configuration for Korean characters ---
# 1) Specify font file paths (use absolute paths)
font_regular = os.path.abspath("assets/malgun.ttf")   # 'Malgun Gothic' Regular
font_bold = os.path.abspath("assets/malgunbd.ttf")    # 'Malgun Gothic' Bold

fm.fontManager.addfont(font_regular)
fm.fontManager.addfont(font_bold)

# 2) Apply global font settings
try:
    fam = fm.FontProperties(fname=font_regular).get_name()  # e.g., 'Malgun Gothic'
    plt.rcParams.update({
        "font.family": fam,
        "axes.unicode_minus": False,  # Prevent minus sign from being displayed incorrectly
    })
except Exception as e:
    print(f"Error setting font: {e}")


## 2. Data Preparation

In [2]:
# --- 1. Load and merge SGG Shapefiles ---
print("Loading and merging SGG Shapefiles...")
gdfs = []
shapefile_dirs = glob.glob('data/raw/원본파일(시도별)/*/')
for s_dir in shapefile_dirs:
    shp_path = os.path.join(s_dir, 'TL_SCCO_SIG.shp')
    if os.path.exists(shp_path):
        try:
            gdf = gpd.read_file(shp_path, encoding='cp949')
            gdfs.append(gdf)
        except Exception as e:
            print(f"Error loading file: {shp_path}, Error: {e}")

# Merge all SGG datasets into a single GeoDataFrame
SGG_map = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
SGG_map = SGG_map[['SIG_CD', 'SIG_KOR_NM', 'geometry']]

# --- 2. Dissolve subdivided municipalities ---
cities_to_dissolve = [
    ('4111', '수원시'), ('4113', '성남시'), ('4117', '안양시'), ('4119', '부천시'),
    ('4127', '안산시'), ('4128', '고양시'), ('4146', '용인시'), ('4311', '청주시'),
    ('4413', '천안시'), ('4711', '포항시'), ('4812', '창원시'), ('5211', '전주시')
]

print("Dissolving subdivided administrative units...")
dissolved_geometries = []
prefixes_to_remove = []
for prefix, name in cities_to_dissolve:
    prefixes_to_remove.append(prefix)
    subset = SGG_map[SGG_map['SIG_CD'].str.startswith(prefix)]
    if not subset.empty:
        unified_geom = subset.union_all()
        dissolved_geometries.append({
            'SIG_CD': prefix + '0', 
            'SIG_KOR_NM': name, 
            'geometry': unified_geom
        })

# Remove original subdivided polygons
sgg_filtered = SGG_map[~SGG_map['SIG_CD'].str.startswith(tuple(prefixes_to_remove))]

# Append dissolved polygons
if dissolved_geometries:
    dissolved_gdf = gpd.GeoDataFrame(dissolved_geometries, crs=SGG_map.crs)
    SGG_map = pd.concat([sgg_filtered, dissolved_gdf], ignore_index=True)


SIDO_NM_map = {
    '11': '서울특별시', '26': '부산광역시', '27': '대구광역시', '28': '인천광역시',
    '29': '광주광역시', '30': '대전광역시', '31': '울산광역시', '36': '세종특별자치시',
    '41': '경기도', '51': '강원특별자치도', '43': '충청북도', '44': '충청남도',
    '52': '전북특별자치도', '46': '전라남도', '47': '경상북도', '48': '경상남도',
    '50': '제주특별자치도'
}
SGG_map['SIDO_CD'] = SGG_map['SIG_CD'].str[:2]
SGG_map['SIDO_NM'] = SGG_map['SIDO_CD'].map(SIDO_NM_map)

mask_dii = SGG_map['SIDO_NM'].str.endswith(('특별시','광역시'))
SGG_map.loc[mask_dii, 'SIG_KOR_NM'] = (
    SGG_map.loc[mask_dii, 'SIDO_NM'].str[:2] + " " + SGG_map.loc[mask_dii, 'SIG_KOR_NM']
)

# --- 3. Set final CRS and save file ---
SGG_map.set_crs(epsg=5179, inplace=True)

os.makedirs('data/processed/map', exist_ok=True)
SGG_map.to_file('data/processed/map/SGG_map.gpkg', driver='GPKG')

print("SGG boundary data preparation completed. -> 'data/processed/map/SGG_map.gpkg'")

Loading and merging SGG Shapefiles...
Dissolving subdivided administrative units...
SGG boundary data preparation completed. -> 'data/processed/map/SGG_map.gpkg'


## 3. Prepare Analysis

### 3.1. Load Network Data

In [3]:
# --- 1. Get list of CSV files ---
all_files = glob.glob("data/processed/deal_network/deal_by/network_by_*.csv")

# --- 2. Create dictionary to store data ---
nw_dict_raw = {}

print("Starting network data loading...")
for file in all_files:
    key = os.path.basename(file).replace("network_by_", "").replace(".csv", "")
    try:
        nw = pd.read_csv(
            file,
            dtype={
                '14_시군구코드_buyer': str,
                '8_시군구코드_seller': str,
                '1_기준연도': int
            }
        )
        
        # Remove invalid codes ('9999')
        nw = nw[~nw['14_시군구코드_buyer'].str.startswith('9999') & ~nw['8_시군구코드_seller'].str.startswith('9999')]
        
        nw_dict_raw[key] = nw
        print(f" - {key} loaded successfully")
    except Exception as e:
        print(f"Error loading file: {file}, Error: {e}")

# --- 3. Reorder dictionary as desired ---
desired_order = [
    'all', 'man', 'innovation', 
    'urban_size_소상공인', 'urban_size_중소기업', 'urban_size_중견기업', 'urban_size_대기업',
    'urban_age_1년 미만', 'urban_age_1~5년 미만', 'urban_age_5~10년 미만', 'urban_age_10년 이상'
]
nw_dict = {key: nw_dict_raw[key] for key in desired_order if key in nw_dict_raw}

print("\nNetwork data loading and reordering completed.")

Starting network data loading...
 - urban_size_중견기업 loaded successfully
 - urban_age_10년 이상 loaded successfully
 - all loaded successfully
 - urban_size_대기업 loaded successfully
 - urban_age_5~10년 미만 loaded successfully
 - man loaded successfully
 - urban_age_1~5년 미만 loaded successfully
 - urban_size_중소기업 loaded successfully
 - innovation loaded successfully
 - urban_size_소상공인 loaded successfully
 - urban_age_1년 미만 loaded successfully

Network data loading and reordering completed.


### 3.2. Common Utility Function Definitions

In [4]:
def extract_disparity_backbone(df: pd.DataFrame, weight_col: str, group_cols: List[str], alpha: float = 0.05) -> pd.DataFrame:
    """Extract the disparity backbone from a directed, weighted network."""
    source_col, target_col = group_cols[0], group_cols[1]
    edges = df[[source_col, target_col, weight_col]].copy()
    edges.rename(columns={source_col: 'source', target_col: 'target', weight_col: 'weight'}, inplace=True)

    node_stats = edges.groupby('source')['weight'].agg(['sum', 'count']).rename(columns={'sum': 's_out', 'count': 'k_out'})
    edges = edges.merge(node_stats, left_on='source', right_index=True)
    edges['p_ij'] = edges['weight'] / edges['s_out']
    edges['p_ij'] = edges['p_ij'].fillna(0)

    k_minus_1 = (edges['k_out'] - 1).clip(lower=0)
    alpha_ij = np.power(1.0 - edges['p_ij'], k_minus_1)
    alpha_ij[edges['k_out'] <= 1] = 0.0
    
    backbone_edges = edges[alpha_ij < alpha]
    
    return df.merge(
        backbone_edges[['source', 'target']].rename(columns={'source': source_col, 'target': target_col}),
        on=[source_col, target_col],
        how='inner'
    )
    
def _ensure_rsi(df: pd.DataFrame, metric_col: str, group_cols: List[str]) -> pd.Series:
    """Ensure RSI (Relative Share Index) is calculated properly."""
    df = df[df[group_cols[0]] != df[group_cols[1]]]
    sum = df[metric_col].sum()
    rsi = (df[metric_col] / sum * 100) if sum > 0 else 0.0
    return rsi.fillna(0).clip(lower=0.0)

def _norm01(values: np.ndarray) -> np.ndarray:
    """Normalize the given array to the [0, 1] range."""
    v = np.asarray(values, dtype=float)
    vmin, vmax = np.nanmin(v), np.nanmax(v)
    if not (np.isfinite(vmin) and np.isfinite(vmax) and vmax > vmin):
        return np.full_like(v, 0.5)
    return (v - vmin) / (vmax - vmin)

def _compute_dii(df: pd.DataFrame, metric_col: str, group_cols: List[str]) -> pd.Series:
    """Compute the Dominance Index (DII)."""
    df = df[[group_cols[0], group_cols[1], metric_col]].copy()
    df = df[df[group_cols[0]] != df[group_cols[1]]]
    inflow = df.groupby(group_cols[1])[metric_col].sum()
    outflow = df.groupby(group_cols[0])[metric_col].sum()
    io = pd.concat([inflow, outflow], axis=1).fillna(0)
    s = io.sum(axis=1)
    m = s.mean()
    dii = (s / m) if m > 0 else s
    dii.index = dii.index.astype(str)
    return dii

def _curve_segments(p1: Point, p2: Point, curvature=0.1, nseg=15):
    """
    Generate a curved line between two points.
    The curve always bends to the right (clockwise) relative to the direction from p1 to p2.
    """
    # 1. Compute direction vector and distance from p1 to p2
    dx, dy = p2.x - p1.x, p2.y - p1.y
    dist = np.hypot(dx, dy)
    if dist == 0: return None, None

    # 2. Rotate the direction vector 90° clockwise to get a rightward perpendicular vector
    if p1.x < p2.x:
        px, py = dy / dist, -dx / dist
    else:
        px, py = -dy / dist, dx / dist

    # 3. Compute control point by shifting midpoint to the right
    mx, my = (p1.x + p2.x) / 2, (p1.y + p2.y) / 2
    cx, cy = mx + px * dist * curvature, my + py * dist * curvature
    
    # 4. Generate coordinates using quadratic Bézier curve
    t = np.linspace(0, 1, nseg + 1)
    bx = (1-t)**2*p1.x + 2*(1-t)*t*cx + t**2*p2.x
    by = (1-t)**2*p1.y + 2*(1-t)*t*cy + t**2*p2.y
    
    # 5. Return curve segments and color gradient for visualization
    segs = np.stack([np.column_stack([bx[:-1], by[:-1]]), np.column_stack([bx[1:], by[1:]])], axis=1)
    cols = np.linspace((1., 0., 0., 1.), (1., 1., 0., 1.), nseg)  # Red to Yellow
    
    return segs, cols

# Map elements
ARROW_FILE, SCALE_LEN_M = "assets/north_arrow.png", 100_000

def add_north_arrow(ax, x, y, arrow_file, zoom=0.18):
    """Add a north arrow to the map."""
    if os.path.exists(arrow_file):
        im = mimg.imread(arrow_file)
        ax.add_artist(AnnotationBbox(OffsetImage(im, zoom=zoom), (x, y), xycoords='axes fraction', frameon=False))

def add_scale_bar(ax, length, location=(0.06,0.03), linewidth=3, color='black'):
    """Add a scale bar to the map."""
    xlim, ylim = ax.get_xlim(), ax.get_ylim()
    sb_x = xlim[0] + (xlim[1]-xlim[0])*location[0]
    sb_y = ylim[0] + (ylim[1]-ylim[0])*location[1]
    ax.plot([sb_x, sb_x+length], [sb_y, sb_y], color=color, linewidth=linewidth)
    ax.text(sb_x + length/2, sb_y, f'{round(length/1000):,} km', va='bottom', ha='center', fontsize=10)
    


# --- Province code conversion dictionaries ---
MOIS2_TO_MOIS5 = {
    "11":"11000","26":"26000","27":"27000","28":"28000","29":"29000","30":"30000","31":"31000","36":"36000",
    "41":"41000","43":"43000","44":"44000","46":"46000","47":"47000","48":"48000","50":"50000","51":"51000","52":"52000",
}
KOSIS2_TO_MOIS5 = {
    "11":"11000","21":"26000","22":"27000","23":"28000","24":"29000","25":"30000",
    "26":"31000","29":"36000","31":"41000","32":"51000","33":"43000","34":"44000",
    "35":"52000","36":"46000","37":"47000","38":"48000","39":"50000","09":None
}

# --- Code conversion ---
def to_mois5_from_sido(x):
    """Convert 2-digit province code in shapefile to 5-digit MOIS code."""
    if x is None: return None
    s = str(x).zfill(2)
    return KOSIS2_TO_MOIS5.get(s[:2])

def to_mois5_from_df(x):
    """Convert SGG code in dataframe to 5-digit province code."""
    if x is None: return None
    s = str(x).strip()
    if not s: return None
    p2 = s[:2].zfill(2)
    return MOIS2_TO_MOIS5.get(p2, KOSIS2_TO_MOIS5.get(p2))

# --- Geometry utilities ---
def _label_point(geom):
    """Return a safe label point inside a polygon."""
    c = geom.centroid
    return c if geom.contains(c) else geom.representative_point()


# --- Manual label offsets for some provinces ---
MANUAL_OFFSET = {"28000": (20000, -20000), "41000": (10000, -30000)}
def _apply_offset(row):
    """Apply manual offset to label coordinates if specified."""
    p = row["rep_pt"]
    off = MANUAL_OFFSET.get(row["MOIS5"])
    return Point(p.x + off[0], p.y + off[1]) if off else p




## 4. Network visualization

In [5]:
# ===== Output directories =====
TABLE_DIR = "outputs/tables/cities_DII_RSI"
HIST_DIR = "outputs/figures/histograms_cities_DII_RSI"
os.makedirs(TABLE_DIR, exist_ok=True)


# ===== Function to generate log-spaced bins =====
def make_log_bins(xmax, n_bins=30, xmin=1e-8):
    return np.logspace(np.log10(xmin), np.log10(xmax), n_bins)


formatter = ScalarFormatter(useMathText=False)
formatter.set_scientific(False)   # disable scientific notation
formatter.set_powerlimits((0, 0)) # force plain decimal representation


# ===== Global axis ranges =====
XLIMS = {
    "SGG_DII": 23.933,   # urban_size_large
    "SGG_RSI": 1.860,    # urban_size_large
    "SIDO_DII": 10.800,  # urban_size_large
    "SIDO_RSI": 42.382   # urban_size_large
}

XMIN = {
    "SGG_DII": 0.000793657,   # ≈ 7.94e-04
    "SGG_RSI": 0.00000215601, # ≈ 2.16e-06
    "SIDO_DII": 0.024856,     # ≈ 2.49e-02
    "SIDO_RSI": 0.000132753   # ≈ 1.33e-04
}

YLIMS = {
    "SGG_DII": 30.1,
    "SGG_RSI": 30.1,
    "SIDO_DII": 40.1,
    "SIDO_RSI": 40.1
}

# --- Load mapping data ---
SGG_map = gpd.read_file("data/processed/map/SGG_map.gpkg").to_crs(epsg=5179)
SIDO_NM_map = {
    '11': '서울특별시', '26': '부산광역시', '27': '대구광역시', '28': '인천광역시',
    '29': '광주광역시', '30': '대전광역시', '31': '울산광역시', '36': '세종특별자치시',
    '41': '경기도', '51': '강원특별자치도', '43': '충청북도', '44': '충청남도',
    '52': '전북특별자치도', '46': '전라남도', '47': '경상북도', '48': '경상남도',
    '50': '제주특별자치도'
}

# ===== Main loop =====
for key, df in nw_dict.items():
    years = sorted(set(df["1_기준연도"].unique()) - {9999})
    dii_records, rsi_records = [], []
    for year in years:
        df_year = df[df["1_기준연도"] == year].copy()

        # -------- DII (SGG) --------
        dii_sgg_df = df_year[["8_시군구코드_seller","14_시군구코드_buyer","총거래관계"]].copy()
        dii_sgg_df = dii_sgg_df[dii_sgg_df["8_시군구코드_seller"] != dii_sgg_df["14_시군구코드_buyer"]]
        inflow = dii_sgg_df.groupby("14_시군구코드_buyer")["총거래관계"].sum()
        outflow = dii_sgg_df.groupby("8_시군구코드_seller")["총거래관계"].sum()
        io = pd.concat([inflow,outflow],axis=1).fillna(0)
        s = io.sum(axis=1); m = s.mean()
        dii_sgg = (s/m) if m > 0 else s
        dii_sgg.index = dii_sgg.index.astype(str)

        for region,val in dii_sgg.items():
            dii_records.append({"level":"SGG","SIG_CD":region,"year":year,"DII":val})

        vals = dii_sgg.to_numpy(); vals = vals[vals>0]
        if len(vals)>0:
            q1,q2,q3 = np.percentile(vals,[25,50,75])
            fig, ax = plt.subplots(figsize=(8,5))
            bin_edges = make_log_bins(XLIMS["SGG_DII"], 30, XMIN["SGG_DII"])
            sns.histplot(vals, bins=bin_edges, color="green", edgecolor="black",
                         stat="percent", ax=ax)
            ax.set_xscale("log")
            ax.set_xlim(bin_edges[0], bin_edges[-1]); ax.set_ylim(0, YLIMS["SGG_DII"])
            ax.xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0], numticks=20))
            ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.6f}".rstrip('0').rstrip('.')))
            ax.set_title(f"DII Histogram (SGG, {year}, log-x)")
            ax.set_xlabel("DII"); ax.set_ylabel("Percentage")

            # --- annotate bars exceeding ylim ---
            counts, edges = np.histogram(vals, bins=bin_edges)
            perc = counts / counts.sum() * 100
            for i, p in enumerate(perc):
                if p > YLIMS["SGG_DII"]:
                    xpos = (edges[i] + edges[i+1]) / 2
                    ax.text(xpos, YLIMS["SGG_DII"]*0.95, f"{p:.1f}%",
                            ha="center", va="top", fontsize=8, color="black",
                            bbox=dict(boxstyle="round,pad=0.2",
                                      facecolor="white", alpha=0.7))

            ax.text(0.02,0.95,f"Q1={q1:.2f}\nQ2={q2:.2f}\nQ3={q3:.2f}",
                    transform=ax.transAxes, ha="left", va="top", fontsize=9,
                    bbox=dict(boxstyle="round", facecolor="white", alpha=0.6))
            ax.yaxis.grid(visible=True, linestyle='--', linewidth=0.5, alpha=0.7)
            outdir = os.path.join(HIST_DIR, key, "SGG","DII"); os.makedirs(outdir,exist_ok=True)
            plt.tight_layout(); plt.savefig(os.path.join(outdir,f"dii_sgg_{key}_{year}.png"),dpi=100); plt.close()

        # -------- RSI (SGG) --------
        rsi_df = df_year[df_year["8_시군구코드_seller"]!=df_year["14_시군구코드_buyer"]].copy()
        sum_val = rsi_df["총거래관계"].sum()
        rsi_series = (rsi_df["총거래관계"]/sum_val*100) if sum_val>0 else 0.0
        rsi_series = rsi_series.fillna(0).clip(lower=0.0); df_year["__RSI__"]=rsi_series
        for _,r in df_year.iterrows():
            rsi_records.append({"level":"SGG","source":r["8_시군구코드_seller"],
                                "target":r["14_시군구코드_buyer"],"year":year,"RSI":r["__RSI__"]})

        vals = df_year["__RSI__"].dropna().to_numpy(); vals = vals[vals>0]
        if len(vals)>0:
            q1,q2,q3 = np.percentile(vals,[25,50,75])
            fig, ax = plt.subplots(figsize=(8,5))
            bin_edges = make_log_bins(XLIMS["SGG_RSI"], 30, XMIN["SGG_RSI"])
            sns.histplot(vals, bins=bin_edges, color="#f28e2b", edgecolor="black",
                         stat="percent", ax=ax)
            ax.set_xscale("log")
            ax.set_xlim(bin_edges[0], bin_edges[-1]); ax.set_ylim(0, YLIMS["SGG_RSI"])
            ax.xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0], numticks=20))
            ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.6f}".rstrip('0').rstrip('.')))
            ax.set_title(f"RSI Histogram (SGG links, {year}, log-x)")
            ax.set_xlabel("RSI"); ax.set_ylabel("Percentage")

            # --- annotate bars exceeding ylim ---
            counts, edges = np.histogram(vals, bins=bin_edges)
            perc = counts / counts.sum() * 100
            for i, p in enumerate(perc):
                if p > YLIMS["SGG_RSI"]:
                    xpos = (edges[i] + edges[i+1]) / 2
                    ax.text(xpos, YLIMS["SGG_RSI"]*0.95, f"{p:.1f}%",
                            ha="center", va="top", fontsize=8, color="black",
                            bbox=dict(boxstyle="round,pad=0.2",
                                      facecolor="white", alpha=0.7))

            ax.text(0.02,0.95,f"Q1={q1:.6f}\nQ2={q2:.6f}\nQ3={q3:.6f}",
                    transform=ax.transAxes, ha="left", va="top", fontsize=9,
                    bbox=dict(boxstyle="round", facecolor="white", alpha=0.6))
            ax.yaxis.grid(visible=True, linestyle='--', linewidth=0.5, alpha=0.7)
            outdir = os.path.join(HIST_DIR, key, "SGG","RSI"); os.makedirs(outdir,exist_ok=True)
            plt.tight_layout(); plt.savefig(os.path.join(outdir,f"rsi_sgg_{key}_{year}.png"),dpi=100); plt.close()

        # -------- DII / RSI (SIDO) --------
        d_sido = df_year.copy()
        vals_seller,vals_buyer=[],[]
        for x in d_sido["8_시군구코드_seller"]:
            s=str(x).strip(); p2=s[:2].zfill(2) if s else None
            vals_seller.append(MOIS2_TO_MOIS5.get(p2,KOSIS2_TO_MOIS5.get(p2)) if p2 else None)
        for x in d_sido["14_시군구코드_buyer"]:
            s=str(x).strip(); p2=s[:2].zfill(2) if s else None
            vals_buyer.append(MOIS2_TO_MOIS5.get(p2,KOSIS2_TO_MOIS5.get(p2)) if p2 else None)
        d_sido["seller_sido"],d_sido["buyer_sido"]=vals_seller,vals_buyer
        d_sido=d_sido.dropna(subset=["seller_sido","buyer_sido"])
        agg=d_sido.groupby(["seller_sido","buyer_sido"],as_index=False)["총거래관계"].sum()

        # DII (SIDO)
        dii_sido_df=agg.copy(); dii_sido_df=dii_sido_df[dii_sido_df["seller_sido"]!=dii_sido_df["buyer_sido"]]
        inflow=dii_sido_df.groupby("buyer_sido")["총거래관계"].sum()
        outflow=dii_sido_df.groupby("seller_sido")["총거래관계"].sum()
        io=pd.concat([inflow,outflow],axis=1).fillna(0)
        s=io.sum(axis=1); m=s.mean()
        dii_sido=(s/m) if m>0 else s; dii_sido.index=dii_sido.index.astype(str)
        for region,val in dii_sido.items():
            dii_records.append({"level":"SIDO","SIG_CD":region,"year":year,"DII":val})

        vals=dii_sido.to_numpy(); vals=vals[vals>0]
        if len(vals)>0:
            q1,q2,q3=np.percentile(vals,[25,50,75])
            fig, ax=plt.subplots(figsize=(8,5))
            bin_edges = make_log_bins(XLIMS["SIDO_DII"], 15, XMIN["SIDO_DII"])
            sns.histplot(vals, bins=bin_edges, color="green", edgecolor="black",
                         stat="percent", ax=ax)
            ax.set_xscale("log")
            ax.set_xlim(bin_edges[0], bin_edges[-1]); ax.set_ylim(0, YLIMS["SIDO_DII"])
            ax.xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0], numticks=20))
            ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.6f}".rstrip('0').rstrip('.')))
            ax.set_title(f"DII Histogram (SIDO, {year}, log-x)")
            ax.set_xlabel("DII"); ax.set_ylabel("Percentage")

            # --- annotate bars exceeding ylim ---
            counts, edges = np.histogram(vals, bins=bin_edges)
            perc = counts / counts.sum() * 100
            for i, p in enumerate(perc):
                if p > YLIMS["SIDO_DII"]:
                    xpos = (edges[i] + edges[i+1]) / 2
                    ax.text(xpos, YLIMS["SIDO_DII"]*0.95, f"{p:.1f}%",
                            ha="center", va="top", fontsize=8, color="black",
                            bbox=dict(boxstyle="round,pad=0.2",
                                      facecolor="white", alpha=0.7))

            ax.text(0.02,0.95,f"Q1={q1:.2f}\nQ2={q2:.2f}\nQ3={q3:.2f}",
                    transform=ax.transAxes, ha="left", va="top", fontsize=9,
                    bbox=dict(boxstyle="round", facecolor="white", alpha=0.6))
            outdir=os.path.join(HIST_DIR, key, "SIDO","DII"); os.makedirs(outdir,exist_ok=True)
            ax.yaxis.grid(visible=True, linestyle='--', linewidth=0.5, alpha=0.7)
            plt.tight_layout(); plt.savefig(os.path.join(outdir,f"dii_sido_{key}_{year}.png"),dpi=100); plt.close()

        # RSI (SIDO)
        sum_val=agg["총거래관계"].sum()
        rsi_series=(agg["총거래관계"]/sum_val*100) if sum_val>0 else 0.0
        rsi_series=rsi_series.fillna(0).clip(lower=0.0); agg["__RSI__"]=rsi_series
        for _,r in agg.iterrows():
            rsi_records.append({"level":"SIDO","source":r["seller_sido"],
                                "target":r["buyer_sido"],"year":year,"RSI":r["__RSI__"]})

        vals=agg["__RSI__"].dropna().to_numpy(); vals=vals[vals>0]
        if len(vals)>0:
            q1,q2,q3=np.percentile(vals,[25,50,75])
            fig, ax=plt.subplots(figsize=(8,5))
            bin_edges = make_log_bins(XLIMS["SIDO_RSI"], 15, XMIN["SIDO_RSI"])
            sns.histplot(vals, bins=bin_edges, color="#f28e2b", edgecolor="black",
                         stat="percent", ax=ax)
            ax.set_xscale("log")
            ax.set_xlim(bin_edges[0], bin_edges[-1]); ax.set_ylim(0, YLIMS["SIDO_RSI"])
            ax.xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0], numticks=20))
            ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.6f}".rstrip('0').rstrip('.')))
            ax.set_title(f"RSI Histogram (SIDO links, {year}, log-x)")
            ax.set_xlabel("RSI"); ax.set_ylabel("Percentage")

            # --- annotate bars exceeding ylim ---
            counts, edges = np.histogram(vals, bins=bin_edges)
            perc = counts / counts.sum() * 100
            for i, p in enumerate(perc):
                if p > YLIMS["SIDO_RSI"]:
                    xpos = (edges[i] + edges[i+1]) / 2
                    ax.text(xpos, YLIMS["SIDO_RSI"]*0.95, f"{p:.1f}%",
                            ha="center", va="top", fontsize=8, color="black",
                            bbox=dict(boxstyle="round,pad=0.2",
                                      facecolor="white", alpha=0.7))

            ax.text(0.02,0.95,f"Q1={q1:.3f}\nQ2={q2:.3f}\nQ3={q3:.3f}",
                    transform=ax.transAxes, ha="left", va="top", fontsize=9,
                    bbox=dict(boxstyle="round", facecolor="white", alpha=0.6))
            outdir=os.path.join(HIST_DIR, key, "SIDO","RSI"); os.makedirs(outdir,exist_ok=True)
            ax.yaxis.grid(visible=True, linestyle='--', linewidth=0.5, alpha=0.7)
            plt.tight_layout(); plt.savefig(os.path.join(outdir,f"rsi_sido_{key}_{year}.png"),dpi=100); plt.close()
            
    # Convert to DataFrame
    dii_records = pd.DataFrame(dii_records)
    rsi_records = pd.DataFrame(rsi_records)
    
    # ===== Post-process records =====
    # Merge with SGG names
    SGG_map_temp = SGG_map[['SIG_CD','SIG_KOR_NM']].copy()
    SGG_map_temp['source'] = SGG_map_temp['SIG_CD'].astype(str)
    SGG_map_temp['target'] = SGG_map_temp['SIG_CD'].astype(str)
    
    SGG_map_temp['SIGUNGU_NM_source'] = SGG_map_temp['SIG_KOR_NM']
    SGG_map_temp['SIGUNGU_NM_target'] = SGG_map_temp['SIG_KOR_NM']
    SGG_map_temp['SIGUNGU_NM'] = SGG_map_temp['SIG_KOR_NM']

    dii_records = pd.merge(dii_records, SGG_map_temp[['SIG_CD','SIGUNGU_NM']], on='SIG_CD', how='left')
    rsi_records = pd.merge(rsi_records, SGG_map_temp[['source','SIGUNGU_NM_source']], on='source', how='left')
    rsi_records = pd.merge(rsi_records, SGG_map_temp[['target','SIGUNGU_NM_target']], on='target', how='left')

    # Merge with CTPRVN names
    dii_records['SIDO_CD'] = dii_records['SIG_CD'].str[:2].str.zfill(2)
    rsi_records['SIDO_source'] = rsi_records['source'].str[:2].str.zfill(2)
    rsi_records['SIDO_target'] = rsi_records['target'].str[:2].str.zfill(2)
    
    dii_records['SIDO_NM'] = dii_records['SIDO_CD'].map(SIDO_NM_map)
    rsi_records['SIDO_NM_source'] = rsi_records['SIDO_source'].map(SIDO_NM_map)
    rsi_records['SIDO_NM_target'] = rsi_records['SIDO_target'].map(SIDO_NM_map)

    # ===== Save results =====
    dii_records.drop(columns=['SIDO_CD'], inplace=True)
    rsi_records.drop(columns=['SIDO_source','SIDO_target'], inplace=True)
    
    dii_records.to_csv(f"{TABLE_DIR}/dii_{key}.csv",index=False, encoding='cp949')
    rsi_records.to_csv(f"{TABLE_DIR}/rsi_{key}.csv",index=False, encoding='cp949')

    print("[OK] Processed key:", key)

[OK] Processed key: all
[OK] Processed key: man
[OK] Processed key: innovation
[OK] Processed key: urban_size_소상공인
[OK] Processed key: urban_size_중소기업
[OK] Processed key: urban_size_중견기업
[OK] Processed key: urban_size_대기업
[OK] Processed key: urban_age_1년 미만
[OK] Processed key: urban_age_1~5년 미만
[OK] Processed key: urban_age_5~10년 미만
[OK] Processed key: urban_age_10년 이상


## 5. Network Geovisualization

### 5.1. SGG

In [None]:
# --- Basic setup and data loading ---
SGG_map = gpd.read_file('data/processed/map/SGG_map.gpkg').to_crs(epsg=5179)
SGG_map['centroid'] = SGG_map.geometry.centroid
centroids = SGG_map.set_index("SIG_CD")["centroid"]
CTPRVN = gpd.read_file("data/raw/bnd_sido_00_2024_2Q/bnd_sido_00_2024_2Q.shp").to_crs(epsg=5179)

METRICS = [("총거래관계", "총거래관계", 0, "총거래관계")]
ALPHA_LEVEL = 0.05

# Link style
BASE_LW, LW_RANGE, RSI_W_BLEND = 0.1, 3.2, 0.30
MIN_ALPHA, MAX_ALPHA, N_SEG = 0.05, 0.99, 15

# Backbone link style
BASE_LW_bb, LW_RANGE_bb, RSI_W_BLEND_bb = 1, 3.2, 0.30
MIN_ALPHA_bb, MAX_ALPHA_bb = 0.1, 0.99

# DII (node size) style
DII_BASE, DII_RANGE, DII_CLIP_HI = 10, 30, 4.0

# Choropleth node size
NODE_SIZE_FIXED = 5


# --- Visualization parameter setup ---
OUTPUT_DIR_ORIGINAL_NODE = "outputs/figures/network_maps/network_maps(sigungu)/network_maps_original"
OUTPUT_DIR_BACKBONE_NODE = "outputs/figures/network_maps/network_maps(sigungu)/network_maps_backbone"
OUTPUT_DIR_ORIGINAL_CHORO = "outputs/figures/network_maps/network_maps(sigungu)/network_maps_original_choropleth"
OUTPUT_DIR_BACKBONE_CHORO = "outputs/figures/network_maps/network_maps(sigungu)/network_maps_backbone_choropleth"

for d in [OUTPUT_DIR_ORIGINAL_NODE, OUTPUT_DIR_BACKBONE_NODE,
          OUTPUT_DIR_ORIGINAL_CHORO, OUTPUT_DIR_BACKBONE_CHORO]:
    os.makedirs(d, exist_ok=True)

# --- Main loop ---
for key, df in nw_dict.items():
    years = sorted(set(df["1_기준연도"].unique()) - {9999})
    for year in years:
        df_year = df[df["1_기준연도"] == year].copy()
        for metric, suffix, q, title in METRICS:
            if metric not in df_year.columns:
                print(f"[WARN] Metric '{metric}' not found. Skipping {key} {year}.")
                continue
            
            sel = df_year[df_year[metric] >= df_year[metric].quantile(q)] if q > 0 else df_year
            sel = sel.dropna(subset=["8_시군구코드_seller", "14_시군구코드_buyer"]).copy()
            if sel.empty:
                print(f"[SKIP] {key}-{year}-{suffix}: No valid data.")
                continue

            # Compute metrics
            dii = _compute_dii(sel, metric, ["8_시군구코드_seller", "14_시군구코드_buyer"])
            dii_sizes = DII_BASE + DII_RANGE * dii
            sel["__RSI__"] = _ensure_rsi(sel, metric, ["8_시군구코드_seller", "14_시군구코드_buyer"])

            # RSI stats
            rsi_all = sel["__RSI__"].values[np.isfinite(sel["__RSI__"].values)]
            pct_targets = [0.97, 0.98, 0.99]
            rsi_percentiles = np.quantile(rsi_all, pct_targets) if rsi_all.size > 0 else np.zeros_like(pct_targets)
            clip_lo, clip_hi = max(0, rsi_percentiles[0]), rsi_percentiles[-1]
            if clip_hi <= clip_lo: clip_hi = clip_lo + 1e-6

            sel_backbone = extract_disparity_backbone(sel, weight_col=metric,  group_cols=["8_시군구코드_seller", "14_시군구코드_buyer"], alpha=ALPHA_LEVEL)

            # Visualization modes: Node-size map vs Choropleth map
            vis_modes = [
                {"mode": "Node", "tasks": [
                    {'name': 'Original', 'df': sel, 'dir': OUTPUT_DIR_ORIGINAL_NODE, 'prefix': ''},
                    {'name': 'Backbone', 'df': sel_backbone, 'dir': OUTPUT_DIR_BACKBONE_NODE, 'prefix': f'[Backbone ({len(sel_backbone)/len(sel)*100:.1f}%) ]'}
                ]},
                {"mode": "Choropleth", "tasks": [
                    {'name': 'Original', 'df': sel, 'dir': OUTPUT_DIR_ORIGINAL_CHORO, 'prefix': ''},
                    {'name': 'Backbone', 'df': sel_backbone, 'dir': OUTPUT_DIR_BACKBONE_CHORO, 'prefix': f'[Backbone ({len(sel_backbone)/len(sel)*100:.1f}%) ]'}
                ]}
            ]

            # Run both visualization modes
            for vis in vis_modes:
                for task in vis["tasks"]:
                    task_name, df_plot, out_dir_base, title_prefix = task['name'], task['df'], task['dir'], task['prefix']
                    outdir = os.path.join(out_dir_base, key)
                    outpath = os.path.join(outdir, f"{key}_{year}_{suffix}.png")

                    if os.path.exists(outpath):
                        print(f"   [SKIP] {vis['mode']} {task_name}: {outpath}")
                        continue
                    if df_plot.empty:
                        print(f"   [SKIP] {vis['mode']} {task_name}: No edges.")
                        continue

                    # Prepare edge list
                    rows = [(str(r["8_시군구코드_seller"]), str(r["14_시군구코드_buyer"]),
                             float(r[metric]), float(r["__RSI__"]))
                            for _, r in df_plot.iterrows() if r["8_시군구코드_seller"] != r["14_시군구코드_buyer"]]
                    if not rows: continue
                    rows_arr = np.array(rows, dtype=object)
                    metric_vals_plot = rows_arr[:, 2].astype(float)
                    rsi_vals_plot = rows_arr[:, 3].astype(float)
                    all_valid_nodes = pd.unique(rows_arr[:, [0, 1]].ravel())

                    # --- Plot ---
                    fig, ax = plt.subplots(figsize=(10, 11), dpi=100)

                    if vis["mode"] == "Choropleth":
                        sgg_dii = SGG_map.merge(dii.rename('dii'), left_on='SIG_CD', right_index=True, how='left').fillna({'dii': 0})
                        norm = TwoSlopeNorm(vcenter=1.0, vmin=dii.min(), vmax=dii.max())
                        sgg_dii.plot(column='dii', cmap='coolwarm', norm=norm, ax=ax,
                                     edgecolor='white', linewidth=0.1, legend=True,
                                     legend_kwds={'label': "Dominance Index (DII)",
                                                  'orientation': "vertical", 'shrink': 0.6})
                    else:  # Node-size mode
                        SGG_map.plot(ax=ax, linewidth=0.6, edgecolor='black', color='darkgrey', zorder=1)

                    CTPRVN.plot(ax=ax, color="none", edgecolor="black", linewidth=1, zorder=2)

                    # Link styles
                    rsi_clipped = np.clip(rsi_vals_plot, clip_lo, clip_hi)
                    rsi_scaled_01 = (rsi_clipped - clip_lo) / (clip_hi - clip_lo)
                    alphas = MIN_ALPHA + (MAX_ALPHA - MIN_ALPHA) * rsi_scaled_01
                    vmax = np.max(metric_vals_plot)
                    logv = np.log10((metric_vals_plot / vmax) * 9.0 + 1.0) if vmax > 0 else np.zeros_like(metric_vals_plot)
                    w_norm = _norm01(logv)
                    widths = BASE_LW + LW_RANGE * ((1.0 - RSI_W_BLEND) * w_norm + RSI_W_BLEND * rsi_scaled_01)
                    if task_name == 'Backbone':
                        alphas = MIN_ALPHA_bb + (MAX_ALPHA_bb - MIN_ALPHA_bb) * rsi_scaled_01
                        widths = BASE_LW_bb + LW_RANGE_bb * ((1.0 - RSI_W_BLEND_bb) * w_norm + RSI_W_BLEND_bb * rsi_scaled_01)



                    # Draw edges
                    all_segments, all_colors, all_widths = [], [], []
                    for i in range(len(rows_arr)):
                        s, t = rows_arr[i, 0], rows_arr[i, 1]
                        if s not in centroids.index or t not in centroids.index: continue
                        w, a = widths[i], alphas[i]
                        segs, cols = _curve_segments(centroids.loc[s], centroids.loc[t],
                                                     curvature=0.25 if s < t else -0.25)
                        if segs is not None:
                            cols[:, 3] = a
                            all_segments.extend(segs); all_colors.extend(cols); all_widths.extend([w] * len(segs))
                    if all_segments:
                        lc = LineCollection(all_segments, colors=all_colors, linewidths=all_widths, capstyle="round", zorder=3)
                        ax.add_collection(lc)

                    # Draw nodes
                    xs = [centroids.loc[c].x for c in all_valid_nodes]
                    ys = [centroids.loc[c].y for c in all_valid_nodes]
                    if vis["mode"] == "Choropleth":
                        ax.scatter(xs, ys, s=NODE_SIZE_FIXED, c="black", alpha=0.6,
                                   edgecolors="white", linewidth=0.5, zorder=4)
                    else:
                        ss = [dii_sizes.get(c, DII_BASE) for c in all_valid_nodes]
                        ax.scatter(xs, ys, s=ss, c="green", alpha=0.5,
                                   edgecolors="white", linewidth=0.9, zorder=3)

                    # Map elements
                    add_north_arrow(ax, 0.86, 0.90, ARROW_FILE)
                    add_scale_bar(ax, SCALE_LEN_M)
                    ax.set_aspect("equal", adjustable="box")
                    ax.set_title(f"{title_prefix}{key} — {year} · {title}", fontsize=14)
                    ax.axis("off")

                    # Legends
                    handles = []
                    if vis["mode"] == "Node":
                        handles.append(Line2D([0],[0], color="none", label="Dominance Index (DII)"))
                        for lab in [0.5, 1.0, 2.0, DII_CLIP_HI]:
                            size_demo = DII_BASE + DII_RANGE * min(DII_CLIP_HI, lab)
                            handles.append(Line2D([], [], marker="o", linestyle="None",
                                                   markersize=np.sqrt(size_demo),
                                                   markerfacecolor="green", alpha=0.5,
                                                   markeredgecolor="white", mew=0.9,
                                                   label=f"   ×{lab:g}"))
                    handles.extend([Line2D([0],[0], color="none", label=" "), Line2D([0],[0], color="none", label="RSI (percentile)")])
                    base_rgb = mcolors.to_rgb("#f28e2b")
                    rsi_demo_clipped = np.clip(rsi_percentiles, clip_lo, clip_hi)
                    rsi_demo_scaled_01 = (rsi_demo_clipped - clip_lo) / (clip_hi - clip_lo)

                    for i, p in enumerate(pct_targets):
                        rv = rsi_percentiles[i]
                        idx = np.argmin(np.abs(rsi_vals_plot - rv))
                        
                        alpha_val = alphas[idx]
                        width_demo = widths[idx]
                        handles.append(Line2D([0],[0], color=(*base_rgb, alpha_val), lw=width_demo, solid_capstyle="round",
                                               label=f"   {rv:.3f}% (P{p*100:g})"))
                    handles.extend([Line2D([0],[0], color="none", label=" "), Line2D([0],[0], color="none", label="Source/Target"),
                                    Line2D([0],[0], color="red", lw=3, label="   Source"),
                                    Line2D([0],[0], color="yellow", lw=3, label="   Target")])
                    leg = ax.legend(handles=handles, loc="lower right", frameon=True, fontsize=12,
                                    title_fontsize=14, title="Legend")
                    leg.get_frame().set_alpha(0.96)

                    os.makedirs(outdir, exist_ok=True)
                    plt.tight_layout()
                    fig.savefig(outpath, dpi=100, bbox_inches='tight')
                    #plt.show()
                    plt.close(fig); gc.collect()
                    print(f"   [OK] Saved {vis['mode']} {task_name} plot to {outpath}")

gc.collect()

   [SKIP] Node Original: outputs/figures/network_maps/network_maps_original/all/all_2016_총거래관계.png
   [SKIP] Node Backbone: outputs/figures/network_maps/network_maps_backbone/all/all_2016_총거래관계.png
   [SKIP] Choropleth Original: outputs/figures/network_maps/network_maps_original_choropleth/all/all_2016_총거래관계.png
   [SKIP] Choropleth Backbone: outputs/figures/network_maps/network_maps_backbone_choropleth/all/all_2016_총거래관계.png
   [SKIP] Node Original: outputs/figures/network_maps/network_maps_original/all/all_2017_총거래관계.png
   [SKIP] Node Backbone: outputs/figures/network_maps/network_maps_backbone/all/all_2017_총거래관계.png
   [SKIP] Choropleth Original: outputs/figures/network_maps/network_maps_original_choropleth/all/all_2017_총거래관계.png
   [SKIP] Choropleth Backbone: outputs/figures/network_maps/network_maps_backbone_choropleth/all/all_2017_총거래관계.png
   [SKIP] Node Original: outputs/figures/network_maps/network_maps_original/all/all_2018_총거래관계.png
   [SKIP] Node Backbone: outputs/figures/

3272688

### 5.2. Sido

In [None]:
# =============== basic setup and SIDO data processing ===============

# --- Metrics & paths ---
CANDIDATE_METRICS = [("총거래관계", "총거래관계", "총거래관계")]
SIDO_PATH = "data/raw/bnd_sido_00_2024_2Q/bnd_sido_00_2024_2Q.shp"
ARROW_FILE = "north_arrow.png"
SCALE_LEN_M = 100_000  # scale bar length (m)

# --- Link style ---
BASE_LW, LW_RANGE, RSI_W_BLEND = 1, 3.2, 0.30
MIN_ALPHA, MAX_ALPHA, N_SEG = 0.1, 0.99, 15

# Backbone link style
BASE_LW_bb, LW_RANGE_bb, RSI_W_BLEND_bb = 1, 3.2, 0.30
MIN_ALPHA_bb, MAX_ALPHA_bb = 0.5, 0.99

# --- Node style ---
DII_BASE, DII_RANGE, DII_CLIP_HI = 10, 50, 4.0
NODE_SIZE_FIXED = 15  # fixed size for choropleth mode


METRICS = [("총거래관계", "총거래관계", 0, "총거래관계")]
ALPHA_LEVEL = 0.1

# Load SIDO shapefile and set CRS to EPSG:5179
SIDO = gpd.read_file(SIDO_PATH).to_crs(epsg=5179)

# Compute representative points (for labels)
SIDO["rep_pt"] = SIDO.geometry.apply(_label_point)

# Detect code column (CTPRVN_CD or SIDO_CD)
code_col = next((c for c in ["CTPRVN_CD", "SIDO_CD"] if c in SIDO.columns), None)

# Convert to 5-digit MOIS code
SIDO["MOIS5"] = SIDO[code_col].apply(to_mois5_from_sido)

# Drop invalid codes and enforce string type
SIDO = SIDO.dropna(subset=["MOIS5"]).copy()
SIDO["MOIS5"] = SIDO["MOIS5"].astype(str)

# Apply manual label offsets
SIDO["rep_pt"] = SIDO.apply(_apply_offset, axis=1)

# Build dictionary: {MOIS5: representative point}
SIDO_PT = SIDO.set_index("MOIS5")["rep_pt"]


# --- Visualization parameter setup ---
OUTPUT_DIR_ORIGINAL_NODE_SIDO = "outputs/figures/network_maps/network_maps(sido)/network_maps_original"
OUTPUT_DIR_BACKBONE_NODE_SIDO = "outputs/figures/network_maps/network_maps(sido)/network_maps_backbone"
OUTPUT_DIR_ORIGINAL_CHORO_SIDO = "outputs/figures/network_maps/network_maps(sido)/network_maps_original_choropleth"
OUTPUT_DIR_BACKBONE_CHORO_SIDO = "outputs/figures/network_maps/network_maps(sido)/network_maps_backbone_choropleth"

for d in [OUTPUT_DIR_ORIGINAL_NODE_SIDO, OUTPUT_DIR_BACKBONE_NODE_SIDO,
          OUTPUT_DIR_ORIGINAL_CHORO_SIDO, OUTPUT_DIR_BACKBONE_CHORO_SIDO]:
    os.makedirs(d, exist_ok=True)

# --- Main loop ---
for key, df in nw_dict.items():
    years = sorted(set(df["1_기준연도"].unique()) - {9999})
    for year in years:
        df_year = df[df["1_기준연도"] == year].copy()
        for metric, suffix, q, title in METRICS:
            if metric not in df_year.columns:
                print(f"[WARN] Metric '{metric}' not found. Skipping {key} {year}.")
                continue

            # --- Aggregate SGG → SIDO ---
            d_sido = df_year.copy()
            d_sido["seller_sido"] = d_sido["8_시군구코드_seller"].apply(to_mois5_from_df).astype("string")
            d_sido["buyer_sido"]  = d_sido["14_시군구코드_buyer"].apply(to_mois5_from_df).astype("string")
            
            d_sido = d_sido.dropna(subset=["seller_sido", "buyer_sido"]).copy()
            if d_sido.empty:
                continue

            agg = d_sido.groupby(["seller_sido","buyer_sido"], as_index=False)[metric].sum()
            agg = agg[agg["seller_sido"] != agg["buyer_sido"]].copy()
            

            # Compute metrics
            dii = _compute_dii(agg, metric, ["seller_sido", "buyer_sido"])
            dii_sizes = DII_BASE + DII_RANGE * dii
            agg["__RSI__"] = _ensure_rsi(agg, metric, ["seller_sido", "buyer_sido"])

            # RSI stats
            rsi_all = agg["__RSI__"].values[np.isfinite(agg["__RSI__"].values)]
            pct_targets = [0.97, 0.98, 0.99]
            rsi_percentiles = np.quantile(rsi_all, pct_targets) if rsi_all.size > 0 else np.zeros_like(pct_targets)
            clip_lo, clip_hi = max(0, rsi_percentiles[0]), rsi_percentiles[-1]
            if clip_hi <= clip_lo: clip_hi = clip_lo + 1e-6

            agg_backbone = extract_disparity_backbone(agg, weight_col=metric, group_cols=["seller_sido", "buyer_sido"], alpha=ALPHA_LEVEL)

            # Visualization modes: Node-size map vs Choropleth map
            vis_modes = [
                {"mode": "Node", "tasks": [
                    {'name': 'Original', 'df': agg, 'dir': OUTPUT_DIR_ORIGINAL_NODE_SIDO, 'prefix': ''},
                    {'name': 'Backbone', 'df': agg_backbone, 'dir': OUTPUT_DIR_BACKBONE_NODE_SIDO, 'prefix': f'[Backbone ({len(agg_backbone)/len(agg)*100:.1f}%) ]'}
                ]},
                {"mode": "Choropleth", "tasks": [
                    {'name': 'Original', 'df': agg, 'dir': OUTPUT_DIR_ORIGINAL_CHORO_SIDO, 'prefix': ''},
                    {'name': 'Backbone', 'df': agg_backbone, 'dir': OUTPUT_DIR_BACKBONE_CHORO_SIDO, 'prefix': f'[Backbone ({len(agg_backbone)/len(agg)*100:.1f}%) ]'}
                ]}
            ]

            # Run both visualization modes
            for vis in vis_modes:
                for task in vis["tasks"]:
                    task_name, df_plot, out_dir_base, title_prefix = task['name'], task['df'], task['dir'], task['prefix']
                    outdir = os.path.join(out_dir_base, key)
                    outpath = os.path.join(outdir, f"{key}_{year}_{suffix}.png")

                    if os.path.exists(outpath):
                        print(f"   [SKIP] {vis['mode']} {task_name}: {outpath}")
                        continue
                    if df_plot.empty:
                        print(f"   [SKIP] {vis['mode']} {task_name}: No edges.")
                        continue

                    # Prepare edge list
                    rows = [(str(r["seller_sido"]), str(r["buyer_sido"]),
                             float(r[metric]), float(r["__RSI__"]))
                            for _, r in df_plot.iterrows() if r["seller_sido"] != r["buyer_sido"]]
                    if not rows: continue
                    rows_arr = np.array(rows, dtype=object)
                    metric_vals_plot = rows_arr[:, 2].astype(float)
                    rsi_vals_plot = rows_arr[:, 3].astype(float)
                    all_valid_nodes = pd.unique(rows_arr[:, [0, 1]].ravel())
                    
                    # --- Plot ---
                    fig, ax = plt.subplots(figsize=(10, 11), dpi=100)

                    if vis["mode"] == "Choropleth":
                        sido_dii = SIDO.merge(dii.rename('dii'), left_on='MOIS5', right_index=True, how='left').fillna({'dii': 0})
                        norm = TwoSlopeNorm(vcenter=1.0, vmin=sido_dii['dii'].min(), vmax=sido_dii['dii'].max())
                        sido_dii.plot(column='dii', cmap='coolwarm', norm=norm, ax=ax,
                                      edgecolor='white', linewidth=0.5, legend=True,
                                      legend_kwds={'label': "Dominance Index (DII)",
                                                   'orientation': "vertical", 'shrink': 0.6})
                    else:
                        SIDO.plot(ax=ax, linewidth=0.6, edgecolor='black', color='darkgrey', zorder=1)

                    CTPRVN.plot(ax=ax, color="none", edgecolor="black", linewidth=1, zorder=2)

                    # Link styles
                    rsi_clipped = np.clip(rsi_vals_plot, clip_lo, clip_hi)
                    rsi_scaled_01 = (rsi_clipped - clip_lo) / (clip_hi - clip_lo)
                    alphas = MIN_ALPHA + (MAX_ALPHA - MIN_ALPHA) * rsi_scaled_01
                    vmax = np.max(metric_vals_plot)
                    logv = np.log10((metric_vals_plot / vmax) * 9.0 + 1.0) if vmax > 0 else np.zeros_like(metric_vals_plot)
                    w_norm = _norm01(logv)
                    widths = BASE_LW + LW_RANGE * ((1.0 - RSI_W_BLEND) * w_norm + RSI_W_BLEND * rsi_scaled_01)
                    if task_name == 'Backbone':
                        alphas = MIN_ALPHA_bb + (MAX_ALPHA_bb - MIN_ALPHA_bb) * rsi_scaled_01
                        widths = BASE_LW_bb + LW_RANGE_bb * ((1.0 - RSI_W_BLEND_bb) * w_norm + RSI_W_BLEND_bb * rsi_scaled_01)

                    # Draw edges
                    all_segments, all_colors, all_widths = [], [], []
                    for i in range(len(rows_arr)):
                        s, t = rows_arr[i, 0], rows_arr[i, 1]
                        if s not in SIDO_PT.index or t not in SIDO_PT.index: continue
                        w, a = widths[i], alphas[i]
                        segs, cols = _curve_segments(SIDO_PT[s], SIDO_PT[t],
                                                     curvature=0.25 if s < t else -0.25)
                        if segs is not None:
                            cols[:, 3] = a
                            all_segments.extend(segs); all_colors.extend(cols); all_widths.extend([w] * len(segs))
                    if all_segments:
                        lc = LineCollection(all_segments, colors=all_colors, linewidths=all_widths, capstyle="round", zorder=3)
                        ax.add_collection(lc)

                    # Draw nodes
                    xs = [SIDO_PT.loc[c].x for c in all_valid_nodes]
                    ys = [SIDO_PT.loc[c].y for c in all_valid_nodes]
                    if vis["mode"] == "Choropleth":
                        ax.scatter(xs, ys, s=NODE_SIZE_FIXED, c="black", alpha=0.6,
                                   edgecolors="white", linewidth=0.5, zorder=4)
                    else:
                        ss = [dii_sizes.get(c, DII_BASE) for c in all_valid_nodes]
                        ax.scatter(xs, ys, s=ss, c="green", alpha=0.5,
                                   edgecolors="white", linewidth=0.9, zorder=3)

                    # Map elements
                    add_north_arrow(ax, 0.86, 0.90, ARROW_FILE)
                    add_scale_bar(ax, SCALE_LEN_M)
                    ax.set_aspect("equal", adjustable="box")
                    ax.set_title(f"{title_prefix}{key} — {year} · {title}", fontsize=14)
                    ax.axis("off")

                    # Legends
                    handles = []
                    if vis["mode"] == "Node":
                        handles.append(Line2D([0],[0], color="none", label="Dominance Index (DII)"))
                        for lab in [0.5, 1.0, 2.0, DII_CLIP_HI]:
                            size_demo = DII_BASE + DII_RANGE * min(DII_CLIP_HI, lab)
                            handles.append(Line2D([], [], marker="o", linestyle="None",
                                                   markersize=np.sqrt(size_demo),
                                                   markerfacecolor="green", alpha=0.5,
                                                   markeredgecolor="white", mew=0.9,
                                                   label=f"   ×{lab:g}"))
                    handles.extend([Line2D([0],[0], color="none", label=" "), Line2D([0],[0], color="none", label="RSI (percentile)")])
                    base_rgb = mcolors.to_rgb("#f28e2b")
                    rsi_demo_clipped = np.clip(rsi_percentiles, clip_lo, clip_hi)
                    rsi_demo_scaled_01 = (rsi_demo_clipped - clip_lo) / (clip_hi - clip_lo)

                    for i, p in enumerate(pct_targets):
                        rv = rsi_percentiles[i]
                        idx = np.argmin(np.abs(rsi_vals_plot - rv))
                        
                        alpha_val = alphas[idx]
                        width_demo = widths[idx]
                        handles.append(Line2D([0],[0], color=(*base_rgb, alpha_val), lw=width_demo, solid_capstyle="round",
                                               label=f"   {rv:.2f}% (P{p*100:g})"))
                    handles.extend([Line2D([0],[0], color="none", label=" "), Line2D([0],[0], color="none", label="Source/Target"),
                                    Line2D([0],[0], color="red", lw=3, label="   Source"),
                                    Line2D([0],[0], color="yellow", lw=3, label="   Target")])
                    leg = ax.legend(handles=handles, loc="lower right", frameon=True, fontsize=12,
                                    title_fontsize=14, title="Legend")
                    leg.get_frame().set_alpha(0.96)

                    os.makedirs(outdir, exist_ok=True)
                    plt.tight_layout()
                    fig.savefig(outpath, dpi=100, bbox_inches='tight')
                    #plt.show()
                    plt.close(fig); gc.collect()
                    print(f"   [OK] Saved {vis['mode']} {task_name} plot to {outpath}")

gc.collect()

   [SKIP] Node Original: outputs/figures/network_maps(sido)/network_maps_original/all/all_2016_총거래관계.png
   [SKIP] Node Backbone: outputs/figures/network_maps(sido)/network_maps_backbone/all/all_2016_총거래관계.png
   [SKIP] Choropleth Original: outputs/figures/network_maps(sido)/network_maps_original_choropleth/all/all_2016_총거래관계.png
   [SKIP] Choropleth Backbone: outputs/figures/network_maps(sido)/network_maps_backbone_choropleth/all/all_2016_총거래관계.png
   [SKIP] Node Original: outputs/figures/network_maps(sido)/network_maps_original/all/all_2017_총거래관계.png
   [SKIP] Node Backbone: outputs/figures/network_maps(sido)/network_maps_backbone/all/all_2017_총거래관계.png
   [SKIP] Choropleth Original: outputs/figures/network_maps(sido)/network_maps_original_choropleth/all/all_2017_총거래관계.png
   [SKIP] Choropleth Backbone: outputs/figures/network_maps(sido)/network_maps_backbone_choropleth/all/all_2017_총거래관계.png
   [SKIP] Node Original: outputs/figures/network_maps(sido)/network_maps_original/all/all_20

0