In [2]:
import pandas as pd
from pathlib import Path

# ============================================================
# 입력 경로 (시뮬레이션 daily)
INPUT_DIR = Path(
    r"C:\Workspace\Sobol\6_reprocess_sobol_output\2_Extract_113"
)

# 출력 경로 (관측 Gumbel 폴더)
OUT_DIR = Path(
    r"Z:\WERL\Team\shlee\Sobol\8_analysis_results\Observations\Gumbel"
)
OUT_DIR.mkdir(exist_ok=True)

OUT_FILE = OUT_DIR / "Simulation_AMS_WaterYear.csv"

DATE_COL = "Date"
FLOW_COL = "FLOW_OUTcms"

# 사용할 water year 범위 (완전한 연도만)
VALID_WATER_YEARS = list(range(2012, 2021))  # 2012–2020
# ============================================================


# === Water Year 계산 함수 ===
def compute_water_year(date_series):
    """
    Water year: Oct 1 – Sep 30
    e.g., 2010-10-01 -> WY 2011
    """
    return date_series.dt.year + (date_series.dt.month >= 10)


# === 결과 저장용 dict ===
# {WaterYear: {sim1: val, sim2: val, ...}}
ams_dict = {}

files = sorted(INPUT_DIR.glob("FLOW_*_selected.csv"))
print(f"Total simulation files found: {len(files)}")

for idx, file in enumerate(files, start=1):

    sim_col = f"sim{idx}"

    df = pd.read_csv(
        file,
        usecols=[DATE_COL, FLOW_COL],
        parse_dates=[DATE_COL]
    )

    # Water year 계산
    df["WaterYear"] = compute_water_year(df[DATE_COL])

    # 완전한 water year만 유지
    df = df[df["WaterYear"].isin(VALID_WATER_YEARS)]

    # Water year별 연 최대값 (AMS)
    wy_max = df.groupby("WaterYear")[FLOW_COL].max()

    # dict에 병합
    for wy, val in wy_max.items():
        if wy not in ams_dict:
            ams_dict[wy] = {}
        ams_dict[wy][sim_col] = val

    # 진행상황 출력
    if idx % 500 == 0:
        print(f"Processed {idx}/{len(files)} simulations")


# === DataFrame 변환 ===
out_df = (
    pd.DataFrame.from_dict(ams_dict, orient="index")
    .sort_index()
)

out_df.index.name = "WaterYear"

# === 저장 ===
out_df.to_csv(OUT_FILE)

print("✔ Simulation AMS (water-year annual max) extraction completed")
print(f"✔ Output saved to: {OUT_FILE}")


Total simulation files found: 28672
Processed 500/28672 simulations
Processed 1000/28672 simulations
Processed 1500/28672 simulations
Processed 2000/28672 simulations
Processed 2500/28672 simulations
Processed 3000/28672 simulations
Processed 3500/28672 simulations
Processed 4000/28672 simulations
Processed 4500/28672 simulations
Processed 5000/28672 simulations
Processed 5500/28672 simulations
Processed 6000/28672 simulations
Processed 6500/28672 simulations
Processed 7000/28672 simulations
Processed 7500/28672 simulations
Processed 8000/28672 simulations
Processed 8500/28672 simulations
Processed 9000/28672 simulations
Processed 9500/28672 simulations
Processed 10000/28672 simulations
Processed 10500/28672 simulations
Processed 11000/28672 simulations
Processed 11500/28672 simulations
Processed 12000/28672 simulations
Processed 12500/28672 simulations
Processed 13000/28672 simulations
Processed 13500/28672 simulations
Processed 14000/28672 simulations
Processed 14500/28672 simulation