In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
# Loding dataset , selecting any csv from the folder files 
df = pd.read_csv("file1.csv")   
df.head(5)


In [None]:
# Keeping required columns only 
keep_cols = [
    "Date","Longitude","Latitude","Elevation",
    "Max Temperature","Min Temperature",
    "Precipitation","Wind","Relative Humidity","Solar"
]
df = df[keep_cols].copy()


In [None]:
# Clean the dates remember , preprocessing has to be done again 
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)


In [None]:
# Re doing the feature engeneering 
df["Year"]       = df["Date"].dt.year
df["Month"]      = df["Date"].dt.month
df["Day Of Year"]= df["Date"].dt.dayofyear


In [None]:
# Re Creating PREPROCESS  (IQR + MinMax) the model was trained on data that was preprossesed 
def iqr_clip(s, k=1.5):
    q1, q3 = s.quantile(0.25), s.quantile(0.75)
    iqr = q3 - q1
    lo, hi = q1 - k*iqr, q3 + k*iqr
    return s.clip(lower=lo, upper=hi)

def minmax_01(s):
    smin, smax = s.min(), s.max()
    if pd.isna(smin) or pd.isna(smax) or smax == smin:
        return pd.Series(np.full(len(s), 0.5), index=s.index)
    return (s - smin) / (smax - smin)



In [None]:

# Preprocessing the data 

for col in ["Precipitation","Wind","Relative Humidity","Solar"]:
    df[col] = iqr_clip(df[col])
    df[col] = minmax_01(df[col])


In [None]:
# Feature engeneering 

#CYCLIC ENCODINGS
df["Month Sin"]      = np.sin(2*np.pi*df["Month"]/12.0)
df["Month Cos"]      = np.cos(2*np.pi*df["Month"]/12.0)
df["Day Of Year Sin"]= np.sin(2*np.pi*df["Day Of Year"]/365.0)
df["Day Of Year Cos"]= np.cos(2*np.pi*df["Day Of Year"]/365.0)

#LAGS & ROLLING 
df["Min Temperature_lag_1"]   = df["Min Temperature"].shift(1)
df["Max Temperature_lag_1"]   = df["Max Temperature"].shift(1)
df["Min Temperature_lag_2"]   = df["Min Temperature"].shift(2)
df["Max Temperature_lag_2"]   = df["Max Temperature"].shift(2)
df["Min Temperature_lag_3"]   = df["Min Temperature"].shift(3)
df["Max Temperature_lag_3"]   = df["Max Temperature"].shift(3)
df["Min Temperature_lag_365"] = df["Min Temperature"].shift(365)
df["Max Temperature_lag_365"] = df["Max Temperature"].shift(365)

for w in [7, 14, 30, 90]:
    df[f"Min Temperature_rolling_lag_{w}"] = df["Min Temperature"].shift(1).rolling(w, min_periods=1).mean()
    df[f"Max Temperature_rolling_lag_{w}"] = df["Max Temperature"].shift(1).rolling(w, min_periods=1).mean()



In [None]:
#Selecting Target and features for the model 
target_columns = ["Min Temperature", "Max Temperature"]
feature_columns = [
    "Precipitation","Wind","Relative Humidity","Solar",
    "Year","Month","Day Of Year",
    "Month Sin","Month Cos","Day Of Year Sin","Day Of Year Cos",
    "Min Temperature_lag_1","Max Temperature_lag_1",
    "Min Temperature_lag_2","Max Temperature_lag_2",
    "Min Temperature_lag_3","Max Temperature_lag_3",
    "Min Temperature_lag_365","Max Temperature_lag_365",
    "Min Temperature_rolling_lag_7","Max Temperature_rolling_lag_7",
    "Min Temperature_rolling_lag_14","Max Temperature_rolling_lag_14",
    "Min Temperature_rolling_lag_30","Max Temperature_rolling_lag_30",
    "Min Temperature_rolling_lag_90","Max Temperature_rolling_lag_90"
]



In [None]:
# Creating the dataset for the model and the X test and Y test 
df_model = df.dropna(subset=target_columns + feature_columns).reset_index(drop=True)
X = df_model[feature_columns].copy()
y = df_model[target_columns].copy()


In [None]:
# Loading the model 
model = joblib.load("best_random_forest_model.pkl")  


In [None]:
'''If the type of loading above dont work , you can rebuild the model 
and retain it based on the instructions in the main model file 
below is code to rebuilt the model using the parameters already found optimal 

from sklearn.ensemble import RandomForestRegressor
import joblib

# Building model directly with the best parameters
model = RandomForestRegressor(
    n_estimators=300,
    min_samples_split=5,
    min_samples_leaf=2,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)

#Training the model
model.fit(X_train, y_train)'''


In [None]:
#Moving on to predictions 
y_pred = model.predict(X)

df_model["Pred_Min_Temperature"] = y_pred[:, 0]
df_model["Pred_Max_Temperature"] = y_pred[:, 1]


In [None]:
#Calcutating metrics of predictions 

mae_min = mean_absolute_error(y["Min Temperature"], df_model["Pred_Min_Temperature"])
mae_max = mean_absolute_error(y["Max Temperature"], df_model["Pred_Max_Temperature"])

rmse_min = np.sqrt(mean_squared_error(y["Min Temperature"], df_model["Pred_Min_Temperature"]))
rmse_max = np.sqrt(mean_squared_error(y["Max Temperature"], df_model["Pred_Max_Temperature"]))

r2_min = r2_score(y["Min Temperature"], df_model["Pred_Min_Temperature"])
r2_max = r2_score(y["Max Temperature"], df_model["Pred_Max_Temperature"])



In [None]:
#Printing metrics 
print(f"MAE (Min Temp): {mae_min:.3f}")
print(f"MAE (Max Temp): {mae_max:.3f}")
print(f"RMSE (Min Temp): {rmse_min:.3f}")
print(f"RMSE (Max Temp): {rmse_max:.3f}")
print(f"R² (Min Temp): {r2_min:.3f}")
print(f"R² (Max Temp): {r2_max:.3f}")
