In [1]:
import pandas as pd
import os

def read_csv_smart(path):
    """Try multiple encodings to robustly read CSV."""
    encodings = ["utf-8-sig", "utf-8", "cp949", "latin1"]
    last_err = None
    for enc in encodings:
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception as e:
            last_err = e
    raise last_err

# Folder path where all files are stored
base_folder = r"C:\Users\starw\OneDrive\바탕 화면\dta_to_csv"

# List of North America files
north_files = [
    "North America (balance sheet).csv",
    "North America (cash flow statement).csv",
    "North America (income statement).csv",
]

# Global file name
global_filename = "Global.csv"

for north_filename in north_files:
    try:
        north_path = os.path.join(base_folder, north_filename)
        global_path = os.path.join(base_folder, global_filename)

        if not os.path.exists(north_path):
            print(f"[SKIP] North file not found: {north_path}")
            continue
        if not os.path.exists(global_path):
            print(f"[ERROR] Global file not found: {global_path}")
            continue

        # Load North America file
        df_north = read_csv_smart(north_path)

        # Load Global fresh for each North file
        df_global = read_csv_smart(global_path)

        # Step 1: Get columns from North America
        north_columns = df_north.columns.tolist()

        # Step 2: Align Global to North schema
        df_global_matched = df_global.reindex(columns=north_columns)

        # Warn if some columns missing in Global
        missing_cols = [c for c in north_columns if c not in df_global.columns]
        if missing_cols:
            print(f"[WARN] Missing in Global for '{north_filename}': {missing_cols}")

        # Step 3: Append Global below North
        df_combined = pd.concat([df_north, df_global_matched], ignore_index=True)

        # Save output
        output_path = os.path.join(
            base_folder, north_filename.replace(".csv", "_with_Global.csv")
        )
        df_combined.to_csv(output_path, index=False, encoding="utf-8-sig")

        print(
            f"[OK] Created: {output_path} | North rows: {len(df_north)} "
            f"+ Global rows: {len(df_global_matched)} -> Total: {len(df_combined)}"
        )

    except Exception as e:
        print(f"[ERROR] Processing '{north_filename}': {e}")

print("[DONE] All 3 files processed.")


[WARN] Missing in Global for 'North America (balance sheet).csv': ['acdo', 'acodo', 'acominc', 'acoxar', 'aedi', 'aldo', 'aocidergl', 'aociother', 'aocipen', 'aocisecgl', 'aodo', 'apb', 'apc', 'arb', 'arc', 'bast', 'bkvlps', 'cb', 'ceql', 'ceqt', 'cld2', 'cld3', 'cld4', 'cld5', 'clfc', 'clfx', 'clg', 'clis', 'cll', 'cllc', 'clo', 'clrll', 'clt', 'crv', 'cstkcv', 'dclo', 'dcom', 'dcpstk', 'dcs', 'dcvsr', 'dcvsub', 'dcvt', 'dd', 'dd2', 'dd3', 'dd4', 'dd5', 'dfs', 'dlto', 'dltp', 'dltsub', 'dm', 'dn', 'dpacb', 'dpacc', 'dpacli', 'dpacls', 'dpacme', 'dpacnr', 'dpaco', 'dpacre', 'dpvieb', 'dpvio', 'dpvir', 'drc', 'drci', 'drlt', 'ds', 'dudd', 'dvpa', 'dvpibb', 'dxd2', 'dxd3', 'dxd4', 'dxd5', 'esopct', 'esopdlt', 'esopnr', 'esopr', 'esopt', 'excadj', 'fatc', 'fatn', 'fato', 'geqrv', 'govgr', 'iaeqci', 'iaeqmi', 'iafici', 'iafxmi', 'iali', 'iasci', 'iasmi', 'iatci', 'iatmi', 'iaui', 'intano', 'invofs', 'invreh', 'invrei', 'invres', 'ipc', 'ipv', 'iseq', 'iseqc', 'iseqm', 'isfi', 'isfxc', 'isf