In [24]:
import os
import sys
parent_dir = os.path.dirname(os.getcwd())
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(parent_dir)
utils_dir = os.path.join(parent_dir, grandparent_dir, "src", "utils")
sys.path.append(utils_dir)

from weighted_accuracy_and_tools import decompose_y, reconstruct_y, weighted_accuracy_score
from process_data import process_data
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import pandas as pd

In [25]:
X_train_path= os.path.join("..", "..", "data","enriched_input", "X_train.csv")
X_train = pd.read_csv(X_train_path, delimiter=',')

X_test_path= os.path.join("..", "..", "data","enriched_input", "X_test.csv")
X_test = pd.read_csv(X_test_path, delimiter=',')
delivery_start = X_test["DELIVERY_START"]

y_train_path= os.path.join("..", "..", "data","enriched_input", "y_train.csv")
y_train = pd.read_csv(y_train_path, delimiter=',')

In [26]:
X_train.set_index("DELIVERY_START", inplace=True)
X_test.set_index("DELIVERY_START", inplace=True)
y_train.set_index("DELIVERY_START", inplace=True)
X_train.index = pd.to_datetime(X_train.index, utc=True)
X_test.index = pd.to_datetime(X_test.index, utc=True)
y_train.index = pd.to_datetime(y_train.index, utc=True)

Regressor :

In [27]:
columns_to_keep = [
    'load_forecast',
    'coal_power_available',
    'gas_power_available',
    'nucelear_power_available',
    'wind_power_forecasts_average',
    'wind_power_forecasts_std',
    'dayofweek',
    'quarter',
    'month',
    'year'
]

# Filter the DataFrame to keep only the selected columns
X_train_regression = X_train[columns_to_keep]
X_test_regression = X_test[columns_to_keep]

In [28]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])

In [29]:
linear_reg = LinearRegression()
decision_tree_reg = DecisionTreeRegressor(random_state=42)
random_forest_reg = RandomForestRegressor(random_state=42)
gradient_boosting_reg = GradientBoostingRegressor(random_state=42)
knn_reg = KNeighborsRegressor()

# Create the voting regressor with all models
voting_reg = VotingRegressor(
    estimators=[
        ('linear_reg', linear_reg),
        ('decision_tree_reg', decision_tree_reg),
        ('random_forest_reg', random_forest_reg),
        ('gradient_boosting_reg', gradient_boosting_reg),
        ('knn_reg', knn_reg)
    ]
)

# Train the voting regressor
voting_reg.fit(X_train_regression, y_train_magnitude)

# Make predictions and evaluate the voting regressor
y_pred_magnitude = voting_reg.predict(X_test_regression)

Classifier :

In [30]:
selected_features = [
    'load_forecast', 
    'nucelear_power_available', 
    'wind_power_forecasts_average', 
    'wind_power_forecasts_std', 
    'hour', 
    'dayofweek',
]

# Filter X_train and X_test to keep only the selected columns
X_train_classifier = X_train[selected_features]
X_test_classifier = X_test[selected_features]

In [31]:
# Re-instantiate the individual classifiers
knn = KNeighborsClassifier(n_neighbors=5)
dtree = DecisionTreeClassifier(random_state=42)
logreg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
gradient_boosting = GradientBoostingClassifier(random_state=42)

# Create the extended voting classifier with all models
extended_voting_clf = VotingClassifier(
    estimators=[
        ('knn', knn),
        ('dtree', dtree),
        ('logreg', logreg),
        ('random_forest', random_forest),
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)

# Train the extended voting classifier on scaled data
extended_voting_clf.fit(X_train_classifier, y_train_direction)

# Make predictions and evaluate the extended model
y_pred_direction = extended_voting_clf.predict(X_test_classifier)

In [32]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting2.csv", index = False)

Score : 0,4888

Try with classic data

In [33]:
X_train_path= os.path.join("..", "..", "data","original_input", "X_train_Wwou3IE.csv")
X_train_preprocessed = pd.read_csv(X_train_path, delimiter=',')
X_test_path= os.path.join("..", "..", "data","original_input", "X_test_GgyECq8.csv")
X_test_preprocessed = pd.read_csv(X_test_path, delimiter=',')
y_train_path= os.path.join("..", "..", "data","original_input", "y_train_jJtXgMX.csv")
y_train_preprocessed = pd.read_csv(y_train_path, delimiter=',')

X_train_preprocessed

Unnamed: 0,DELIVERY_START,load_forecast,coal_power_available,gas_power_available,nucelear_power_available,wind_power_forecasts_average,solar_power_forecasts_average,wind_power_forecasts_std,solar_power_forecasts_std,predicted_spot_price
0,2022-01-01 02:00:00+01:00,49439.0,3386.0,11487.0,44118.0,3035.0,0.0,79.248348,0.000000,
1,2022-01-01 03:00:00+01:00,46511.0,3386.0,11487.0,44118.0,3143.0,0.0,61.776532,0.000000,
2,2022-01-01 04:00:00+01:00,45158.0,3386.0,11487.0,44118.0,3288.0,0.0,44.291112,0.000000,
3,2022-01-01 05:00:00+01:00,44779.0,3386.0,11487.0,44118.0,3447.0,0.0,36.127588,0.000000,
4,2022-01-01 06:00:00+01:00,45284.0,3386.0,11487.0,44118.0,3679.0,0.0,30.983023,0.000000,
...,...,...,...,...,...,...,...,...,...,...
10600,2023-03-29 19:00:00+02:00,50814.0,3386.0,11952.0,38320.0,7552.0,651.0,247.408490,7.821622,108.11
10601,2023-03-29 20:00:00+02:00,50628.0,3386.0,11952.0,38320.0,8338.0,109.0,155.795012,2.534054,125.66
10602,2023-03-29 21:00:00+02:00,48201.0,3386.0,11952.0,38320.0,9115.0,0.0,126.884684,0.000000,138.01
10603,2023-03-29 22:00:00+02:00,47967.0,3386.0,11952.0,38320.0,9636.0,0.0,156.669189,0.000000,136.74


In [34]:
X_train = process_data(X_train_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
X_test = process_data(X_test_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
y_train = process_data(y_train_preprocessed.copy(deep=True), None, None, None)

In [35]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])

In [36]:
linear_reg = LinearRegression()
decision_tree_reg = DecisionTreeRegressor(random_state=42)
random_forest_reg = RandomForestRegressor(random_state=42)
gradient_boosting_reg = GradientBoostingRegressor(random_state=42)
knn_reg = KNeighborsRegressor()

# Create the voting regressor with all models
voting_reg = VotingRegressor(
    estimators=[
        ('linear_reg', linear_reg),
        ('decision_tree_reg', decision_tree_reg),
        ('random_forest_reg', random_forest_reg),
        ('gradient_boosting_reg', gradient_boosting_reg),
        ('knn_reg', knn_reg)
    ]
)

# Train the voting regressor
voting_reg.fit(X_train, y_train_magnitude)

# Make predictions and evaluate the voting regressor
y_pred_magnitude = voting_reg.predict(X_test)

In [37]:
# Re-instantiate the individual classifiers
knn = KNeighborsClassifier(n_neighbors=5)
dtree = DecisionTreeClassifier(random_state=42)
logreg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
gradient_boosting = GradientBoostingClassifier(random_state=42)

# Create the extended voting classifier with all models
extended_voting_clf = VotingClassifier(
    estimators=[
        ('knn', knn),
        ('dtree', dtree),
        ('logreg', logreg),
        ('random_forest', random_forest),
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)

# Train the extended voting classifier on scaled data
extended_voting_clf.fit(X_train, y_train_direction)

# Make predictions and evaluate the extended model
y_pred_direction = extended_voting_clf.predict(X_test)

In [None]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting4.csv", index = False)

Regression but just voting