In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1️⃣ Load dataset
file_path = "consolidated_features.csv"
df = pd.read_csv(file_path)

# 2️⃣ Handle missing values (if any)
df.fillna(df.median(), inplace=True)

# 3️⃣ Define Features (X) and Target (y)
X = df.drop(columns=[df.columns[0]])  # Assuming first column is index/time
y = df[df.columns[0]]  # Predicting the first temperature column

# 4️⃣ Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5️⃣ Train a Regression Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 6️⃣ Predictions
y_pred = model.predict(X_test)

# 7️⃣ Evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# 8️⃣ Display results
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R² Score: {r2}")


Mean Absolute Error: 0.09458854875283458
Mean Squared Error: 0.0566160442909632
Root Mean Squared Error: 0.23794126227067722
R² Score: 0.995490215446985


In [5]:
import joblib
joblib.dump(model, "URBAN_HEAT_rf_model.pkl")

['URBAN_HEAT_rf_model.pkl']