In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pickle

In [6]:
# Load dataset (use tripled or original)
df = pd.read_excel("Synthetic_datasets\\26 july - synthetic dataset.xlsx")

# Inputs / outputs
X = df.drop(columns=['output'])
y = df['output']

# Categorical columns
categoricals = ['season', 'day']
numeric_cols = [c for c in X.columns if c not in categoricals]

# One-hot encode
encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(X[categoricals])
X_numeric = X[numeric_cols].values

# Final input array
X_final = np.concatenate([encoded, X_numeric], axis=1)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y, test_size=0.2, random_state=36
)

# -------------------------
# ⭐ Random Forest Regressor
# -------------------------
model = RandomForestRegressor(
    n_estimators=300,
    max_depth=None,          # let it grow deep
    min_samples_split=2,
    min_samples_leaf=1,
    n_jobs=-1,               # use all CPU cores
    random_state=36
)

# Train
model.fit(X_train, y_train)

# Accuracy
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)

print("Training complete!")
print("R² Score:", r2)
print("Accuracy%:", r2 * 100)

# Save model + encoder
with open("july-model-rf.pkl", "wb") as f:
    pickle.dump(model, f)

with open("july-encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)


Training complete!
R² Score: 0.9320115369644345
Accuracy%: 93.20115369644346
