In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

In [7]:
data = pd.read_parquet('../data/cleanedData/allData.parquet')

data['hour'] = data['measurementDate'].dt.hour
data['minute'] = data['measurementDate'].dt.minute
data['day_of_week'] = data['measurementDate'].dt.dayofweek
data['month'] = data['measurementDate'].dt.month

data['hour_sin'] = np.sin(2 * np.pi * data['hour'] / 24)
data['hour_cos'] = np.cos(2 * np.pi * data['hour'] / 24)
data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

data = data.drop(columns=['measurementDate', 'hour', 'month'])

In [8]:
data['lag_1'] = data['oxygenValue'].shift(1)
data['lag_2'] = data['oxygenValue'].shift(2)
data['lag_3'] = data['oxygenValue'].shift(3)

data['rolling_mean_3'] = data['oxygenValue'].rolling(window=3).mean()
data['rolling_std_3'] = data['oxygenValue'].rolling(window=3).std()

data = data.dropna()

In [9]:
features = ['nitrateValue', 'phosphateValue', 'ammoniumValue', 'waterFlowPerMinute', 'precipitation',
            'hour_sin', 'hour_cos', 'minute', 'day_of_week', 'month_sin', 'month_cos',
            'lag_1', 'lag_2', 'lag_3', 'rolling_mean_3', 'rolling_std_3']
target = 'oxygenValue'

X = data[features]
y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.001, random_state=42)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

y_pred = svr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 0.0013436613679321833


In [11]:
# Plot actual vs predicted for the optimized SVR model
plt.figure(figsize=(12, 6))
plt.scatter(y_test, y_pred_optimized, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r', linewidth=2)
plt.xlabel('Actual Oxygen Value')
plt.ylabel('Predicted Oxygen Value')
plt.title('Actual vs Predicted Oxygen Value (Optimized SVR Model)')
plt.show()


NameError: name 'plt' is not defined