In [3]:
import pandas as pd
import os

# Load master catalog
df_path = "Datasets.csv"
df = pd.read_csv(df_path)

# Expanded keyword dictionaries for detailed category checks
keyword_map = {
    # --- core categories previously requested ---
    'precipitation': [
        'precip', 'rain', 'rainf', 'prcp', 'ppt', 'tp',
        'total_precipitation', 'precipitation_flux', 'rain_rate'
    ],
    'temperature': [
        'temperature', 'temp', 't2m', 'tas', 'tasmax', 'tasmin', 'tmmx',
        'tmmn', 'tmax', 'tmin', 'tavg', 'tmean', 'lst', 'surface_temperature'
    ],
    'soil_moisture': [
        'soil', 'moisture', 'ssm', 'mrsos', 'soilw', 'swvl', 'swvl1', 'swvl2',
        'smap', 'rootmoist', 'SoilMoi', 'SoilMBulk', 'SoilTmp', 'sm'
    ],
    'dem': [
        'dem', 'elevation', 'altitude', 'dsm', 'dtm', 'srtm', 'aw3d', 'terrain',
        'height'
    ],
    'ndvi': ['ndvi'],

    # --- additional meteorological & environmental categories ---
    'evapotranspiration': [
        'evap', 'et', 'pet', 'aet', 'evapotranspiration', 'evaporation',
        'latent_heat_flux', 'ET_inst', 'LE', 'etpot'
    ],
    'radiation': [
        'radiation', 'srad', 'solar', 'shortwave', 'sw', 'longwave', 'lw',
        'surface_radiation', 'incoming_shortwave', 'surface_solar'
    ],
    'wind': [
        'wind', 'u_component', 'v_component', 'u10', 'v10', 'uwnd', 'vwnd',
        'wind_speed', 'windgust', 'u_wind', 'v_wind'
    ],
    'humidity': [
        'humidity', 'relative_humidity', 'rh', 'specific_humidity', 'q',
        'shum', 'qv2m'
    ],
    'snow': [
        'snow', 'swe', 'snw', 'snow_depth', 'snowfall', 'snwd',
        'snow_water_equivalent'
    ],
    'land_cover': [
        'landcover', 'land cover', 'lc_type', 'lc', 'classification',
        'cover', 'igbp'
    ]
}

def filter_bands(row, keywords):
    """Return (bands, units) that match any keyword."""
    if pd.isna(row['Band Names']):
        return [], []
    bands = [b.strip() for b in str(row['Band Names']).split(',')]
    units = [u.strip() for u in str(row['Band Units']).split(',')] if not pd.isna(row['Band Units']) else [''] * len(bands)
    if len(units) < len(bands):
        units += [''] * (len(bands) - len(units))
    sel_bands, sel_units = [], []
    for b, u in zip(bands, units):
        b_low = b.lower()
        if any(kw.lower() in b_low for kw in keywords):
            sel_bands.append(b)
            sel_units.append(u)
    return sel_bands, sel_units

def build_category_df(df, keywords):
    rows = []
    for _, row in df.iterrows():
        sel_bands, sel_units = filter_bands(row, keywords)
        if sel_bands:
            new_row = row.copy()
            new_row['Band Names'] = ', '.join(sel_bands)
            new_row['Band Units'] = ', '.join(sel_units)
            rows.append(new_row)
    return pd.DataFrame(rows)

# Build and save CSVs for all categories
output_paths = {}
for category, kw_list in keyword_map.items():
    cat_df = build_category_df(df, kw_list)
    out_path = f"{category}.csv"
    cat_df.to_csv(out_path, index=False)
    output_paths[category] = out_path

