### Cell 1 03 Baseline Visualization — Setup

목표: Phase 1 베이스라인 시각화(V1~V3) 생성 및 figures/ 저장

입력: panel_year_category.csv, totals_year.csv, (옵션) shares_year_category.csv

출력(고정 파일명):

figures/phase1_V1_total_trend.png

figures/phase1_V2_category_trends.png

In [1]:
# [Code Cell 1] Setup & Load
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Flexible path resolver: repo-style first, then current dir, then /mnt/data
def resolve_path(filename: str) -> str:
    candidates = [
        os.path.join("data_processed", filename),
        filename,
        os.path.join("/mnt/data", filename),
    ]
    for p in candidates:
        if os.path.exists(p):
            return p
    raise FileNotFoundError(f"Cannot find {filename}. Tried: {candidates}")

FIG_DIR = "figures"
os.makedirs(FIG_DIR, exist_ok=True)

panel_path  = resolve_path("panel_year_category.csv")
totals_path = resolve_path("totals_year.csv")
shares_path = resolve_path("shares_year_category.csv")  # may exist

panel = pd.read_csv(panel_path)
totals = pd.read_csv(totals_path)
shares = pd.read_csv(shares_path) if os.path.exists(shares_path) else None

# Minimal schema checks
assert {"year","category_std","count"}.issubset(panel.columns), panel.columns
assert {"year","total_count"}.issubset(totals.columns), totals.columns

print("Loaded:")
print("- panel:", panel.shape, "from", panel_path)
print("- totals:", totals.shape, "from", totals_path)
print("- shares:", None if shares is None else shares.shape, "from", shares_path)

# Ensure year sorted later
panel["year"] = panel["year"].astype(int)
totals["year"] = totals["year"].astype(int)
if shares is not None:
    shares["year"] = shares["year"].astype(int)


Loaded:
- panel: (140, 6) from data_processed\panel_year_category.csv
- totals: (20, 3) from data_processed\totals_year.csv
- shares: (140, 8) from data_processed\shares_year_category.csv


### Cell 2 V1 — 국제범죄 총량 추이 (Total Trend)
X축(Year): 연도(YYYY). 분석 범위의 시간 흐름을 나타냄.

Y축(Total count): 해당 연도의 국제범죄 총 발생 건수.

In [7]:
# [Code Cell 2] V1: Total International Crime Trend

# 정렬
totals_sorted = totals.sort_values("year")

# 플롯
plt.figure()
plt.plot(totals_sorted["year"], totals_sorted["total_count"])
plt.xlabel("Year")
plt.ylabel("Total count")
plt.title("V1. Total International Crime Count by Year")

# 저장
out_path = os.path.join(FIG_DIR, "phase1_V1_total_trend.png")
plt.tight_layout()
plt.savefig(out_path, dpi=200)
plt.close()




out_path


'figures\\phase1_V1_total_trend.png'

In [9]:
# [Code Cell 3] V1 요약 표 (연도별 총량)
v1_table = (
    totals_sorted[["year", "total_count"]]
    .rename(columns={"year": "Year", "total_count": "Total_Count"})
    .reset_index(drop=True)
)

# 화면 표시(상위/하위 일부만 보고 싶으면 head()/tail()로 조절)
v1_table

# [Code Cell 4] V1 요약 표를 이미지로 저장

# 표 데이터 준비
table_df = v1_table.copy()

# Figure 생성
fig, ax = plt.subplots()
ax.axis("off")

# 테이블 그리기
tbl = ax.table(
    cellText=table_df.values,
    colLabels=table_df.columns,
    loc="center"
)

# 스타일 최소 조정
tbl.auto_set_font_size(False)
tbl.set_fontsize(8)
tbl.scale(1, 1.2)

# 제목
plt.title("Table V1. Total International Crime Count by Year", pad=10)

# 저장
table_out_path = os.path.join(FIG_DIR, "phase1_V1_total_trend_table.png")
plt.tight_layout()
plt.savefig(table_out_path, dpi=200)
plt.close()

table_out_path


'figures\\phase1_V1_total_trend_table.png'

### Cell 5 V2 — 범주별 발생 건수 추이 (Category Count Trends)

X축(Year): 연도(YYYY).

Y축(Count): 해당 연도·범주(category_std)의 발생 건수.

선(Line): 표준 범주(category_std)별 추이(동일 축에서 비교).

In [11]:
# [Code Cell 5] V2: Category-level Count Trends (Line)

pivot_cnt = (
    panel.pivot_table(index="year", columns="category_std", values="count", aggfunc="sum")
    .sort_index()
    .fillna(0)
)

plt.figure()
for col in pivot_cnt.columns:
    plt.plot(pivot_cnt.index, pivot_cnt[col], label=col)

plt.xlabel("Year")
plt.ylabel("Count")
plt.title("V2. Category-level Count Trends")
plt.legend(ncol=2, fontsize=8)

out_path_v2 = os.path.join(FIG_DIR, "phase1_V2_category_trends.png")
plt.tight_layout()
plt.savefig(out_path_v2, dpi=200)
plt.close()

out_path_v2


'figures\\phase1_V2_category_trends.png'

In [12]:
# [Code Cell 6] V2 요약 표 (연도 × 범주)

v2_table = (
    pivot_cnt
    .reset_index()
    .rename(columns={"year": "Year"})
)

v2_table

# [Code Cell 7] V2 요약 표를 이미지로 저장

fig, ax = plt.subplots(figsize=(10, 6))
ax.axis("off")

tbl = ax.table(
    cellText=v2_table.values,
    colLabels=v2_table.columns,
    loc="center"
)

tbl.auto_set_font_size(False)
tbl.set_fontsize(7)
tbl.scale(1, 1.1)

plt.title("Table V2. Category-level Count by Year", pad=10)

table_out_path_v2 = os.path.join(FIG_DIR, "phase1_V2_category_trends_table.png")
plt.tight_layout()
plt.savefig(table_out_path_v2, dpi=200)
plt.close()

table_out_path_v2


'figures\\phase1_V2_category_trends_table.png'

###  Cell 8 V3 — 범주별 구성비 변화 (Composition / Share Stacked Area)

X축(Year): 연도(YYYY).

Y축(Share): 해당 연도 국제범죄 총량 대비 범주별 비중(0~1).

면적(Area): 각 범주(category_std)의 구성비.

In [14]:
# [Code Cell 8] V3: Composition (Share) Stacked Area

if shares is None:
    tmp = panel.merge(totals[["year", "total_count"]], on="year", how="left")
    tmp["share"] = np.where(tmp["total_count"] > 0, tmp["count"] / tmp["total_count"], 0.0)
    shares_use = tmp
else:
    shares_use = shares.copy()

pivot_share = (
    shares_use.pivot_table(index="year", columns="category_std", values="share", aggfunc="mean")
    .sort_index()
    .fillna(0)
)

plt.figure()
plt.stackplot(
    pivot_share.index,
    [pivot_share[c].values for c in pivot_share.columns],
    labels=pivot_share.columns
)

plt.xlabel("Year")
plt.ylabel("Share")
plt.title("V3. Composition (Share) of International Crime Over Time")
plt.legend(ncol=2, fontsize=8, loc="upper left")

out_path_v3 = os.path.join(FIG_DIR, "phase1_V3_share_stacked_area.png")
plt.tight_layout()
plt.savefig(out_path_v3, dpi=200)
plt.close()

out_path_v3


'figures\\phase1_V3_share_stacked_area.png'

In [15]:
# [Code Cell 9] V3 요약 표 (연도 × 범주별 구성비)

v3_table = (
    pivot_share
    .reset_index()
    .rename(columns={"year": "Year"})
)

v3_table

# [Code Cell 10] V3 요약 표를 이미지로 저장

fig, ax = plt.subplots(figsize=(10, 6))
ax.axis("off")

tbl = ax.table(
    cellText=v3_table.values,
    colLabels=v3_table.columns,
    loc="center"
)

tbl.auto_set_font_size(False)
tbl.set_fontsize(7)
tbl.scale(1, 1.1)

plt.title("Table V3. Category Share by Year", pad=10)

table_out_path_v3 = os.path.join(FIG_DIR, "phase1_V3_share_stacked_area_table.png")
plt.tight_layout()
plt.savefig(table_out_path_v3, dpi=200)
plt.close()

table_out_path_v3



'figures\\phase1_V3_share_stacked_area_table.png'