In [3]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor

DATA_PATH = "cairo_merged_with_season.csv"   # حطيه جنب الملف
MODEL_PATH = "temp_model.pkl"

TARGETS = ["tempmax", "tempmin", "temp"]

def add_datetime_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    dt = pd.to_datetime(df["datetime"], errors="coerce")
    df["month"] = dt.dt.month
    df["dayofyear"] = dt.dt.dayofyear
    return df

def main():
    df = pd.read_csv(DATA_PATH)
    df = add_datetime_features(df)

    # افصلي targets
    y = df[TARGETS]

    # features = كل الأعمدة الرقمية المفيدة (شيلنا targets + season + datetime)
    drop_cols = set(TARGETS + ["season", "datetime"])
    feature_cols = [c for c in df.columns if c not in drop_cols]

    # خلّي بس الأعمدة الرقمية (لو في نصوص)
    X = df[feature_cols].select_dtypes(include="number")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])

    preprocessor = ColumnTransformer(
        transformers=[("num", numeric_transformer, X.columns.tolist())],
        remainder="drop"
    )

    model = RandomForestRegressor(
        n_estimators=400,
        random_state=42
    )

    reg = Pipeline(steps=[
        ("preprocess", preprocessor),
        ("model", model)
    ])

    reg.fit(X_train, y_train)
    preds = reg.predict(X_test)

    # تقييم لكل Target
    print("=== Evaluation (MAE / R2) ===")
    for i, t in enumerate(TARGETS):
        mae = mean_absolute_error(y_test[t], preds[:, i])
        r2 = r2_score(y_test[t], preds[:, i])
        print(f"{t}: MAE={mae:.3f} | R2={r2:.3f}")

    joblib.dump({"model": reg, "features": X.columns.tolist(), "targets": TARGETS}, MODEL_PATH)
    print(f"\nSaved model to: {MODEL_PATH}")

if __name__ == "__main__":
    main()


=== Evaluation (MAE / R2) ===
tempmax: MAE=0.859 | R2=0.970
tempmin: MAE=0.733 | R2=0.968
temp: MAE=0.187 | R2=0.997

Saved model to: temp_model.pkl
