In [1]:
import pandas as pd
import glob
import os
import re

# Folder with CSV files
DATA_DIR = r"Data\EA_Selected_Farm_Data_csv"     # current folder
OUT_DIR = os.path.join(DATA_DIR, "filtered")
os.makedirs(OUT_DIR, exist_ok=True)

# Columns to keep
KEEP_COLS = ["grid_id", "lat", "lon", "H_wheat_dot_hat"]

# Loop through files
for file in glob.glob(os.path.join(DATA_DIR, "*.csv")):
    filename = os.path.basename(file)
    print(f"Processing: {filename}")

    # Split by dots → get second part (c1991)
    parts = filename.split(".")
    if len(parts) < 2:
        print(f"⚠️ Skipping (unexpected name): {filename}")
        continue

    c_part = parts[1]   # e.g. "c1991"

    # Extract year from c1991
    match = re.search(r"\d{4}", c_part)
    if not match:
        print(f"⚠️ No year found in {filename}")
        continue

    year = match.group()

    # Read and filter data
    df = pd.read_csv(file)
    df_out = df[KEEP_COLS]

    # Output file
    out_file = os.path.join(
        OUT_DIR,
        f"{year}_wheat_data.csv"
    )

    df_out.to_csv(out_file, index=False)

print("✅ All files processed successfully.")


Processing: f2022.c1991.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1992.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1993.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1994.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1995.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1996.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1997.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1998.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c1999.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2000.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2001.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2002.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2003.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2004.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2005.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2006.p2022.t2022_cropped_with_gridid.csv
Processing: f2022.c2007.p2022.t2022_crop