In [5]:
from pathlib import Path
import pandas as pd

# ---- find project root (works in Jupyter) ----
def find_root(start=Path.cwd()):
    for p in [start, *start.parents]:
        if (p / ".here").exists() or (p / "data").is_dir():
            return p
    return start  # fallback

ROOT = find_root()
DATA_RAW = ROOT / "data" / "raw"
DATA_PROCESSED = ROOT / "data" / "processed"

print("Project root:", ROOT)           
print("Raw data dir exists:", DATA_RAW.exists())

# ---- load raw files ----
data_2019 = pd.read_csv(DATA_RAW / "data2019.csv")
data_2022 = pd.read_csv(DATA_RAW / "data2022.csv")

# make sure EMP is numeric
for df in (data_2019, data_2022):
    df["EMP"] = pd.to_numeric(df["EMP"], errors="coerce")

# merge on STATEA, COUNTYA, naics
merged = (
    data_2019[["STATEA", "COUNTYA", "naics", "EMP"]].rename(columns={"EMP": "EMP_2019"})
    .merge(
        data_2022[["STATEA", "COUNTYA", "naics", "EMP"]].rename(columns={"EMP": "EMP_2022"}),
        on=["STATEA", "COUNTYA", "naics"],
        how="inner"
    )
)

# compute growth rate (%)
merged = merged.dropna(subset=["EMP_2019", "EMP_2022"])
merged["Growth_Rate"] = (merged["EMP_2022"] - merged["EMP_2019"]) / merged["EMP_2019"] * 100

# save to processed/
out_path = DATA_PROCESSED / "employment_growth_rate.csv"
out_path.parent.mkdir(parents=True, exist_ok=True)   # 若不存在则创建目录
merged.to_csv(out_path, index=False)
print("Saved:", out_path)

Project root: /Users/lirunzhi/Documents/2024一些美国的材料/ECON129/MAP project/Mapping project_副本
Raw data dir exists: True
Saved: /Users/lirunzhi/Documents/2024一些美国的材料/ECON129/MAP project/Mapping project_副本/data/processed/employment_growth_rate.csv
