In [None]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

In [None]:
input_dir = "data/processed/feature_engineered/"
model_dir = "models/"
os.makedirs(model_dir, exist_ok=True)

In [None]:
features = ['day', 'month', 'day_of_year', 'sin_day', 'cos_day', 'season_id']
targets = ['Rainfall_mm', 'MinTemp_C', 'MaxTemp_C', 'Humidity_pct', 'Wind_kmph', 'Pressure_hPa']

In [None]:
for filename in os.listdir(input_dir):
    if filename.endswith(".xlsx"):
        tehsil = filename.split('_')[0]
        excel = pd.read_excel(os.path.join(input_dir, filename), sheet_name=None)
        
        df_all_years = pd.concat(excel.values(), ignore_index=True)

        df_all_years.dropna(subset=features + targets, inplace=True)

        # Time-based split
        train = df_all_years[df_all_years['year'] < 2024]
        test = df_all_years[df_all_years['year'] == 2024]

        X_train = train[features]
        y_train = train[targets]

        # Initialize multi-target RF
        rf = MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42))
        rf.fit(X_train, y_train)

        # Save model
        joblib.dump(rf, os.path.join(model_dir, f"{tehsil}_weather_model.pkl"))
        print(f"Model trained & saved: {tehsil}_weather_model.pkl")