In [2]:
import os
import sys
parent_dir = os.path.dirname(os.getcwd())
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(parent_dir)
utils_dir = os.path.join(parent_dir, grandparent_dir, "src", "utils")
sys.path.append(utils_dir)

from weighted_accuracy_and_tools import decompose_y, reconstruct_y, weighted_accuracy_score
from process_data import process_data
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import pandas as pd

In [25]:
X_train_path= os.path.join("..", "..", "data","enriched_input", "X_train.csv")
X_train = pd.read_csv(X_train_path, delimiter=',')

X_test_path= os.path.join("..", "..", "data","enriched_input", "X_test.csv")
X_test = pd.read_csv(X_test_path, delimiter=',')
delivery_start = X_test["DELIVERY_START"]

y_train_path= os.path.join("..", "..", "data","enriched_input", "y_train.csv")
y_train = pd.read_csv(y_train_path, delimiter=',')

In [26]:
X_train.set_index("DELIVERY_START", inplace=True)
X_test.set_index("DELIVERY_START", inplace=True)
y_train.set_index("DELIVERY_START", inplace=True)
X_train.index = pd.to_datetime(X_train.index, utc=True)
X_test.index = pd.to_datetime(X_test.index, utc=True)
y_train.index = pd.to_datetime(y_train.index, utc=True)

Regressor :

In [27]:
columns_to_keep = [
    'load_forecast',
    'coal_power_available',
    'gas_power_available',
    'nucelear_power_available',
    'wind_power_forecasts_average',
    'wind_power_forecasts_std',
    'dayofweek',
    'quarter',
    'month',
    'year'
]

# Filter the DataFrame to keep only the selected columns
X_train_regression = X_train[columns_to_keep]
X_test_regression = X_test[columns_to_keep]

In [28]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])

In [29]:
linear_reg = LinearRegression()
decision_tree_reg = DecisionTreeRegressor(random_state=42)
random_forest_reg = RandomForestRegressor(random_state=42)
gradient_boosting_reg = GradientBoostingRegressor(random_state=42)
knn_reg = KNeighborsRegressor()

# Create the voting regressor with all models
voting_reg = VotingRegressor(
    estimators=[
        ('linear_reg', linear_reg),
        ('decision_tree_reg', decision_tree_reg),
        ('random_forest_reg', random_forest_reg),
        ('gradient_boosting_reg', gradient_boosting_reg),
        ('knn_reg', knn_reg)
    ]
)

# Train the voting regressor
voting_reg.fit(X_train_regression, y_train_magnitude)

# Make predictions and evaluate the voting regressor
y_pred_magnitude = voting_reg.predict(X_test_regression)

Classifier :

In [30]:
selected_features = [
    'load_forecast', 
    'nucelear_power_available', 
    'wind_power_forecasts_average', 
    'wind_power_forecasts_std', 
    'hour', 
    'dayofweek',
]

# Filter X_train and X_test to keep only the selected columns
X_train_classifier = X_train[selected_features]
X_test_classifier = X_test[selected_features]

In [31]:
# Re-instantiate the individual classifiers
knn = KNeighborsClassifier(n_neighbors=5)
dtree = DecisionTreeClassifier(random_state=42)
logreg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
gradient_boosting = GradientBoostingClassifier(random_state=42)

# Create the extended voting classifier with all models
extended_voting_clf = VotingClassifier(
    estimators=[
        ('knn', knn),
        ('dtree', dtree),
        ('logreg', logreg),
        ('random_forest', random_forest),
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)

# Train the extended voting classifier on scaled data
extended_voting_clf.fit(X_train_classifier, y_train_direction)

# Make predictions and evaluate the extended model
y_pred_direction = extended_voting_clf.predict(X_test_classifier)

In [32]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting2.csv", index = False)

Score : 0,4888

Try with classic data

In [11]:
X_train_path= os.path.join("..", "..", "data","original_input", "X_train_Wwou3IE.csv")
X_train_preprocessed = pd.read_csv(X_train_path, delimiter=',')
X_test_path= os.path.join("..", "..", "data","original_input", "X_test_GgyECq8.csv")
X_test_preprocessed = pd.read_csv(X_test_path, delimiter=',')
y_train_path= os.path.join("..", "..", "data","original_input", "y_train_jJtXgMX.csv")
y_train_preprocessed = pd.read_csv(y_train_path, delimiter=',')
delivery_start = X_test_preprocessed["DELIVERY_START"]

In [12]:
X_train = process_data(X_train_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
X_test = process_data(X_test_preprocessed.copy(deep=True), "predicted_spot_price", None, "standard")
y_train = process_data(y_train_preprocessed.copy(deep=True), None, None, None)

In [35]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])

In [36]:
linear_reg = LinearRegression()
decision_tree_reg = DecisionTreeRegressor(random_state=42)
random_forest_reg = RandomForestRegressor(random_state=42)
gradient_boosting_reg = GradientBoostingRegressor(random_state=42)
knn_reg = KNeighborsRegressor()

# Create the voting regressor with all models
voting_reg = VotingRegressor(
    estimators=[
        ('linear_reg', linear_reg),
        ('decision_tree_reg', decision_tree_reg),
        ('random_forest_reg', random_forest_reg),
        ('gradient_boosting_reg', gradient_boosting_reg),
        ('knn_reg', knn_reg)
    ]
)

# Train the voting regressor
voting_reg.fit(X_train, y_train_magnitude)

# Make predictions and evaluate the voting regressor
y_pred_magnitude = voting_reg.predict(X_test)

In [37]:
# Re-instantiate the individual classifiers
knn = KNeighborsClassifier(n_neighbors=5)
dtree = DecisionTreeClassifier(random_state=42)
logreg = LogisticRegression(random_state=42)
random_forest = RandomForestClassifier(random_state=42)
gradient_boosting = GradientBoostingClassifier(random_state=42)

# Create the extended voting classifier with all models
extended_voting_clf = VotingClassifier(
    estimators=[
        ('knn', knn),
        ('dtree', dtree),
        ('logreg', logreg),
        ('random_forest', random_forest),
        ('gradient_boosting', gradient_boosting)
    ],
    voting='hard'
)

# Train the extended voting classifier on scaled data
extended_voting_clf.fit(X_train, y_train_direction)

# Make predictions and evaluate the extended model
y_pred_direction = extended_voting_clf.predict(X_test)

In [38]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting4.csv", index = False)

Select features only for rd forest

In [18]:
X_train_regression = X_train[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', 'wind_power_forecasts_std']]
X_test_regression = X_test[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', 'wind_power_forecasts_std']]
X_train_classification = X_train[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', "wind_power_forecasts_std"]]
X_test_classification = X_train[['load_forecast', 'nucelear_power_available', 'wind_power_forecasts_average', "wind_power_forecasts_std"]]


In [19]:
y_train_direction, y_train_magnitude = decompose_y(y_train['spot_id_delta'])


In [20]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train_direction)
y_pred_direction = clf.predict(X_test)
reg = RandomForestRegressor(random_state=42)
reg.fit(X_train, y_train_magnitude)
y_pred_magnitude = reg.predict(X_test)

In [21]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)

In [22]:
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_double_rd_forest5.csv", index = False)

#Make the test for y_result_double_rd_forest5.csv, score : 

Voting classifier with RF, DT and KNN

In [24]:
rf = RandomForestClassifier(random_state=42)
dt = DecisionTreeClassifier(random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)

# Create the voting classifier
voting_clf = VotingClassifier(estimators=[('rf', rf), ('dt', dt), ('knn', knn)], voting='hard')

# Train the voting classifier
voting_clf.fit(X_train, y_train_direction)

# Make predictions with the voting classifier
y_pred_direction = voting_clf.predict(X_test)

Voting regressor using RdF, DT, Knn

In [23]:
rf = RandomForestRegressor(random_state=42)
dt = DecisionTreeRegressor(random_state=42)
knn = KNeighborsRegressor(n_neighbors=5)

# Create the voting regressor
voting_reg = VotingRegressor(estimators=[('rf', rf), ('dt', dt), ('knn', knn)])

# Train the voting regressor
voting_reg.fit(X_train, y_train_magnitude)

# Make predictions with the voting regressor
y_pred_magnitude = voting_reg.predict(X_test)

In [25]:
y_pred = reconstruct_y(y_pred_direction, y_pred_magnitude)
d = {"DELIVERY_START" : delivery_start, "spot_id_delta" : y_pred.reshape(-1)}
y_pred = pd.DataFrame(data = d)

y_pred.to_csv("../../data/output_y_test/y_result_voting6.csv", index = False)

#Make the test for y_result_voting6.csv, score : 