In [None]:
from pathlib import Path
import pandas as pd, numpy as np
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_log_error
import features   # your helper module

ROOT   = Path.cwd().parent
train  = pd.read_csv(ROOT / "train.csv")
train  = features.add_time_features(train)

X = train[["hour","weekday","month","temp","humidity","windspeed"]]
y = train["count"]
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
param_grid = {
    "n_estimators": [200, 400, 600, 800],
    "max_depth": [None, 10, 30, 50],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf":  [1, 2, 4],
}

rs = RandomizedSearchCV(
    RandomForestRegressor(random_state=42, n_jobs=-1),
    param_grid,
    n_iter=20,
    scoring="neg_mean_squared_log_error",
    cv=3,
    random_state=42,
    n_jobs=-1,
)
rs.fit(X_train, y_train)
best_rf = rs.best_estimator_
print("Best params:", rs.best_params_)


In [None]:
preds  = np.clip(best_rf.predict(X_val), 0, None)
rmsle  = mean_squared_log_error(y_val, preds, squared=False)
print("Tuned RF RMSLE:", rmsle)


In [None]:
gbr = GradientBoostingRegressor(random_state=42)
gbr.fit(X_train, y_train)
rmsle_gbr = mean_squared_log_error(
    y_val, np.clip(gbr.predict(X_val), 0, None), squared=False
)
print("GradientBoosting RMSLE:", rmsle_gbr)


In [None]:
print("\nSummary")
print(f"- Tuned RandomForest: {rmsle:.4f}")
print(f"- GradientBoosting : {rmsle_gbr:.4f}")
