In [53]:
import os
import sys
parent_dir = os.path.dirname(os.getcwd())
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(parent_dir)
utils_dir = os.path.join(parent_dir, grandparent_dir, "src", "utils")
sys.path.append(utils_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns

import tensorflow as tf
from lag_data import create_lagged_features
from weighted_accuracy_and_tools import decompose_y, reconstruct_y, weighted_accuracy_score
from process_data import process_data

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [54]:
X_train_path= os.path.join("..", "..", "data","original_input", "X_train_Wwou3IE.csv")
X_train_preprocessed = pd.read_csv(X_train_path, delimiter=',')
X_test_path= os.path.join("..", "..", "data","original_input", "X_test_GgyECq8.csv")
X_test_preprocessed = pd.read_csv(X_test_path, delimiter=',')
y_train_path= os.path.join("..", "..", "data","original_input", "y_train_jJtXgMX.csv")
y_train_preprocessed = pd.read_csv(y_train_path, delimiter=',')
delivery_start = X_test_preprocessed["DELIVERY_START"]

In [55]:
X_train = process_data(X_train_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
X_test = process_data(X_test_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
y_train = process_data(y_train_preprocessed.copy(deep=True), None, None, None)
X_train = X_train[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', 'wind_power_forecasts_std']]
X_test = X_test[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', 'wind_power_forecasts_std']]

In [56]:
n_lags = 5  # Number of lagged observations
X_train, y_train = create_lagged_features(X_train, y_train, n_lags)
X_test, _ = create_lagged_features(X_test, pd.DataFrame([]), n_lags)

In [57]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])

In [58]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train_direction)
y_pred_direction = clf.predict(X_test)
reg = RandomForestRegressor(random_state=42)
reg.fit(X_train, y_train_magnitude)
y_pred_magnitude = reg.predict(X_test)

In [59]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)

In [60]:
delivery_start.shape, y_pred.shape, X_test.shape

((4942,), (4937,), (4937, 24))

In [61]:
delivery_start

0       2023-04-02 00:00:00+02:00
1       2023-04-02 01:00:00+02:00
2       2023-04-02 02:00:00+02:00
3       2023-04-02 03:00:00+02:00
4       2023-04-02 04:00:00+02:00
                  ...            
4937    2023-10-24 19:00:00+02:00
4938    2023-10-24 20:00:00+02:00
4939    2023-10-24 21:00:00+02:00
4940    2023-10-24 22:00:00+02:00
4941    2023-10-24 23:00:00+02:00
Name: DELIVERY_START, Length: 4942, dtype: object

In [62]:
delivery_start = delivery_start.iloc[5:]

# Ensure y_pred is correctly reshaped to match the adjusted delivery_start length
# This is just a safety check; it should already match if y_pred was derived from the correctly processed X_test
assert delivery_start.shape[0] == y_pred.reshape(-1).shape[0], "Shape mismatch between adjusted delivery_start and y_pred"

In [63]:
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_double_rd_forest6.csv", index = False)

Let's do the same with a voting

In [64]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeRegressor

In [65]:
rf = RandomForestClassifier(random_state=42)
dt = DecisionTreeClassifier(random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)

# Create the voting classifier
voting_clf = VotingClassifier(estimators=[('rf', rf), ('dt', dt), ('knn', knn)], voting='hard')

# Train the voting classifier
voting_clf.fit(X_train, y_train_direction)

# Make predictions with the voting classifier
y_pred_direction = voting_clf.predict(X_test)


rf = RandomForestRegressor(random_state=42)
dt = DecisionTreeRegressor(random_state=42)
knn = KNeighborsRegressor(n_neighbors=5)

# Create the voting regressor
voting_reg = VotingRegressor(estimators=[('rf', rf), ('dt', dt), ('knn', knn)])

# Train the voting regressor
voting_reg.fit(X_train, y_train_magnitude)

# Make predictions with the voting regressor
y_pred_magnitude = voting_reg.predict(X_test)

In [None]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)

In [None]:
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting7.csv", index = False)