In [3]:

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from datetime import timedelta

In [4]:
df = pd.read_csv(r'data/unpred/santacruz_combined_normal.csv')


In [5]:
df = df.rename(columns={'MN': 'month', 'DT': 'day', 'YEAR': 'year', 'HR': 'hr_code'})


In [6]:
hr_code_to_hour = {0: 0, 12: 3, 24: 6, 36: 9, 48: 12, 60: 15, 72: 18, 84: 21}
valid_codes = list(hr_code_to_hour.keys())

In [7]:
df = df[df['hr_code'].isin(valid_codes)]


In [8]:
df['hour'] = df['hr_code'].map(hr_code_to_hour)


In [9]:
df['DateTime'] = pd.to_datetime(df[['year', 'month', 'day']]) + pd.to_timedelta(df['hour'], unit='h')


In [10]:
# Time-based features
df['dayofyear'] = df['DateTime'].dt.dayofyear
df['weekday'] = df['DateTime'].dt.weekday
df['month'] = df['DateTime'].dt.month

In [11]:
features = ['DPT', 'WBT', 'Normal_Temp', 'hour', 'dayofyear', 'weekday', 'month']
target = 'DBT'


In [12]:
df = df.dropna(subset=features + [target])


In [13]:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [14]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [23]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse:.2f}")

Test RMSE: 3.53


In [16]:
last_dt = df['DateTime'].max()
future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')

  future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')


In [17]:
future_df = pd.DataFrame({
    'DateTime': future_times,
    'DPT': df['DPT'].iloc[-56:].mean(),
    'WBT': df['WBT'].iloc[-56:].mean(),
    'Normal_Temp': df['Normal_Temp'].iloc[-56:].mean()
})

In [18]:
future_df['hour'] = future_df['DateTime'].dt.hour
future_df['dayofyear'] = future_df['DateTime'].dt.dayofyear
future_df['weekday'] = future_df['DateTime'].dt.weekday
future_df['month'] = future_df['DateTime'].dt.month


In [19]:
future_features = future_df[features]
future_df['Predicted_DBT'] = model.predict(future_features)


In [20]:
future_df

Unnamed: 0,DateTime,DPT,WBT,Normal_Temp,hour,dayofyear,weekday,month,Predicted_DBT
0,2025-04-17 06:00:00,20.433929,23.77722,32.842857,6,107,3,4,30.64
1,2025-04-17 09:00:00,20.433929,23.77722,32.842857,9,107,3,4,30.698
2,2025-04-17 12:00:00,20.433929,23.77722,32.842857,12,107,3,4,30.716
3,2025-04-17 15:00:00,20.433929,23.77722,32.842857,15,107,3,4,29.925158
4,2025-04-17 18:00:00,20.433929,23.77722,32.842857,18,107,3,4,29.261862
5,2025-04-17 21:00:00,20.433929,23.77722,32.842857,21,107,3,4,28.940467
6,2025-04-18 00:00:00,20.433929,23.77722,32.842857,0,108,4,4,26.612993
7,2025-04-18 03:00:00,20.433929,23.77722,32.842857,3,108,4,4,28.717497
8,2025-04-18 06:00:00,20.433929,23.77722,32.842857,6,108,4,4,30.642
9,2025-04-18 09:00:00,20.433929,23.77722,32.842857,9,108,4,4,30.708


In [21]:
print("7-Day Forecast (3-hour intervals):")
print(future_df[['DateTime', 'Predicted_DBT']])

7-Day Forecast (3-hour intervals):
              DateTime  Predicted_DBT
0  2025-04-17 06:00:00      30.640000
1  2025-04-17 09:00:00      30.698000
2  2025-04-17 12:00:00      30.716000
3  2025-04-17 15:00:00      29.925158
4  2025-04-17 18:00:00      29.261862
5  2025-04-17 21:00:00      28.940467
6  2025-04-18 00:00:00      26.612993
7  2025-04-18 03:00:00      28.717497
8  2025-04-18 06:00:00      30.642000
9  2025-04-18 09:00:00      30.708000
10 2025-04-18 12:00:00      30.712000
11 2025-04-18 15:00:00      29.924798
12 2025-04-18 18:00:00      29.388523
13 2025-04-18 21:00:00      29.109025
14 2025-04-19 00:00:00      27.082870
15 2025-04-19 03:00:00      28.814516
16 2025-04-19 06:00:00      30.650000
17 2025-04-19 09:00:00      30.796000
18 2025-04-19 12:00:00      30.916000
19 2025-04-19 15:00:00      29.981852
20 2025-04-19 18:00:00      29.576673
21 2025-04-19 21:00:00      29.235539
22 2025-04-20 00:00:00      27.449974
23 2025-04-20 03:00:00      29.368444
24 2025-04-20 0

In [22]:
future_df[['DateTime', 'Predicted_DBT']].to_csv(r'data/pred/dbt_forecast_rfr.csv', index=False)
print("Forecast saved to dbt_7day_3hr_forecast_rfr.csv")

Forecast saved to dbt_7day_3hr_forecast_rfr.csv
