In [15]:

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from datetime import timedelta

In [16]:
df = pd.read_csv(r'data/unpred/combined_data.csv')


In [17]:
df = df.rename(columns={'MN': 'month', 'DT': 'day', 'YEAR': 'year', 'HR': 'hr_code'})


In [18]:
hr_code_to_hour = {0: 0, 12: 3, 24: 6, 36: 9, 48: 12, 60: 15, 72: 18, 84: 21}
valid_codes = list(hr_code_to_hour.keys())

In [19]:
df = df[df['hr_code'].isin(valid_codes)]


In [20]:
df['hour'] = df['hr_code'].map(hr_code_to_hour)


In [21]:
df['DateTime'] = pd.to_datetime(df[['year', 'month', 'day']]) + pd.to_timedelta(df['hour'], unit='h')


In [22]:
# Time-based features
df['dayofyear'] = df['DateTime'].dt.dayofyear
df['weekday'] = df['DateTime'].dt.weekday
df['month'] = df['DateTime'].dt.month

In [23]:
features = ['DPT', 'WBT', 'hour', 'dayofyear', 'weekday', 'month']
target = 'DBT'


In [24]:
df = df.dropna(subset=features + [target])


In [25]:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [26]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [27]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse:.2f}")

Test RMSE: 3.40


In [28]:
last_dt = df['DateTime'].max()
future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')

  future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')


In [30]:
future_df = pd.DataFrame({
    'DateTime': future_times,
    'DPT': df['DPT'].iloc[-56:].mean(),
    'WBT': df['WBT'].iloc[-56:].mean(),
})

In [31]:
future_df['hour'] = future_df['DateTime'].dt.hour
future_df['dayofyear'] = future_df['DateTime'].dt.dayofyear
future_df['weekday'] = future_df['DateTime'].dt.weekday
future_df['month'] = future_df['DateTime'].dt.month


In [32]:
future_features = future_df[features]
future_df['Predicted_DBT'] = model.predict(future_features)


In [33]:
future_df

Unnamed: 0,DateTime,DPT,WBT,hour,dayofyear,weekday,month,Predicted_DBT
0,2025-04-19 03:00:00,20.669643,24.01592,3,109,5,4,28.866974
1,2025-04-19 06:00:00,20.669643,24.01592,6,109,5,4,30.591592
2,2025-04-19 09:00:00,20.669643,24.01592,9,109,5,4,30.577592
3,2025-04-19 12:00:00,20.669643,24.01592,12,109,5,4,30.631592
4,2025-04-19 15:00:00,20.669643,24.01592,15,109,5,4,30.169636
5,2025-04-19 18:00:00,20.669643,24.01592,18,109,5,4,29.232617
6,2025-04-19 21:00:00,20.669643,24.01592,21,109,5,4,29.016651
7,2025-04-20 00:00:00,20.669643,24.01592,0,110,6,4,27.759772
8,2025-04-20 03:00:00,20.669643,24.01592,3,110,6,4,29.385488
9,2025-04-20 06:00:00,20.669643,24.01592,6,110,6,4,30.589592


In [34]:
print("7-Day Forecast (3-hour intervals):")
print(future_df[['DateTime', 'Predicted_DBT']])

7-Day Forecast (3-hour intervals):
              DateTime  Predicted_DBT
0  2025-04-19 03:00:00      28.866974
1  2025-04-19 06:00:00      30.591592
2  2025-04-19 09:00:00      30.577592
3  2025-04-19 12:00:00      30.631592
4  2025-04-19 15:00:00      30.169636
5  2025-04-19 18:00:00      29.232617
6  2025-04-19 21:00:00      29.016651
7  2025-04-20 00:00:00      27.759772
8  2025-04-20 03:00:00      29.385488
9  2025-04-20 06:00:00      30.589592
10 2025-04-20 09:00:00      30.575592
11 2025-04-20 12:00:00      30.631592
12 2025-04-20 15:00:00      30.205636
13 2025-04-20 18:00:00      29.113743
14 2025-04-20 21:00:00      28.958912
15 2025-04-21 00:00:00      27.922104
16 2025-04-21 03:00:00      29.643990
17 2025-04-21 06:00:00      30.635592
18 2025-04-21 09:00:00      30.621592
19 2025-04-21 12:00:00      30.663592
20 2025-04-21 15:00:00      29.792698
21 2025-04-21 18:00:00      29.296334
22 2025-04-21 21:00:00      29.044053
23 2025-04-22 00:00:00      27.805225
24 2025-04-22 0

In [35]:
future_df[['DateTime', 'Predicted_DBT']].to_csv(r'data/pred/dbt_forecast_rfr.csv', index=False)
print("Forecast saved to dbt_7day_3hr_forecast_rfr.csv")

Forecast saved to dbt_7day_3hr_forecast_rfr.csv
