In [3]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from datetime import timedelta


In [4]:
df = pd.read_csv(r'data\unpred\combined_data.csv')


In [5]:
df = df.rename(columns={'MN': 'month', 'DT': 'day', 'YEAR': 'year', 'HR': 'hr_code'})


In [6]:
hr_code_to_hour = {0: 0, 12: 3, 24: 6, 36: 9, 48: 12, 60: 15, 72: 18, 84: 21}
valid_codes = list(hr_code_to_hour.keys())

# Filter to valid HR values only
df = df[df['hr_code'].isin(valid_codes)]

# Map HR code to actual hour
df['hour'] = df['hr_code'].map(hr_code_to_hour)

In [7]:
df['DateTime'] = pd.to_datetime(df[['year', 'month', 'day']]) + pd.to_timedelta(df['hour'], unit='h')

# Time-based features
df['dayofyear'] = df['DateTime'].dt.dayofyear
df['weekday'] = df['DateTime'].dt.weekday
df['month'] = df['DateTime'].dt.month

In [8]:
features = ['DPT', 'WBT', 'hour', 'dayofyear', 'weekday', 'month']
target = 'DBT'

# Drop any rows with missing data
df = df.dropna(subset=features + [target])

In [9]:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = SVR(kernel='rbf', C=100, epsilon=0.2)
model.fit(X_train_scaled, y_train)



In [10]:
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse:.2f}")


Test RMSE: 4.77


In [11]:
last_dt = df['DateTime'].max()
future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')  # 3h steps × 7 days


  future_times = pd.date_range(start=last_dt + timedelta(hours=3), periods=56, freq='3H')  # 3h steps × 7 days


In [12]:
future_df = pd.DataFrame({
    'DateTime': future_times,
    'DPT': df['DPT'].iloc[-56:].mean(),
    'WBT': df['WBT'].iloc[-56:].mean(),
})

# Add time-based features
future_df['hour'] = future_df['DateTime'].dt.hour
future_df['dayofyear'] = future_df['DateTime'].dt.dayofyear
future_df['weekday'] = future_df['DateTime'].dt.weekday
future_df['month'] = future_df['DateTime'].dt.month

# Scale future features using same scaler
future_features = future_df[features]
future_scaled = scaler.transform(future_features)

In [13]:
future_df['Predicted_DBT'] = model.predict(future_scaled)

# Show the forecast
print("7-Day Forecast (3-hour intervals):")
print(future_df[['DateTime', 'Predicted_DBT']])


7-Day Forecast (3-hour intervals):
              DateTime  Predicted_DBT
0  2025-04-19 03:00:00      30.631819
1  2025-04-19 06:00:00      31.040555
2  2025-04-19 09:00:00      31.207746
3  2025-04-19 12:00:00      31.039004
4  2025-04-19 15:00:00      30.675456
5  2025-04-19 18:00:00      30.286260
6  2025-04-19 21:00:00      29.878482
7  2025-04-20 00:00:00      30.213659
8  2025-04-20 03:00:00      30.606818
9  2025-04-20 06:00:00      31.071014
10 2025-04-20 09:00:00      31.261572
11 2025-04-20 12:00:00      31.084225
12 2025-04-20 15:00:00      30.677606
13 2025-04-20 18:00:00      30.226066
14 2025-04-20 21:00:00      29.776682
15 2025-04-21 00:00:00      30.361849
16 2025-04-21 03:00:00      30.626599
17 2025-04-21 06:00:00      31.030173
18 2025-04-21 09:00:00      31.175873
19 2025-04-21 12:00:00      30.984319
20 2025-04-21 15:00:00      30.667972
21 2025-04-21 18:00:00      30.485446
22 2025-04-21 21:00:00      30.493703
23 2025-04-22 00:00:00      30.418251
24 2025-04-22 0

In [14]:
future_df[['DateTime', 'Predicted_DBT']].to_csv(r'data/pred/dbt_forecast_svr.csv', index=False)
print("Forecast saved to dbt_7day_3hr_forecast.csv")

Forecast saved to dbt_7day_3hr_forecast.csv
