In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM
import tensorflow as tf
import matplotlib.pyplot as plt
import os

In [None]:
working_dir = os.getcwd()

In [None]:
train_data = pd.read_csv(f"{working_dir}/../data/TrainData.csv")
train_data = train_data.drop(columns=["U10", "V10", "WS10", "U100", "V100", "WS100"])
weather_forecast_input = pd.read_csv(f"{working_dir}/../data/WeatherForecastInput.csv")

solution = pd.read_csv(f"{working_dir}/../data/Solution.csv")
X_true = solution["TIMESTAMP"].apply(lambda x: x[-2:]).astype(int).values.reshape(-1, 1)

X = train_data["TIMESTAMP"].apply(lambda x: x[-2:]).astype(int).values.reshape(-1, 1)
y = train_data["POWER"].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# def prepare_data(data, window_size):
#     X, y = [], []
#     for i in range(len(data) - window_size):
#         X.append(data[i: i + window_size])
#         y.append(data[i + window_size])

#     return np.array(X), np.array(y)

# window_size = 24
# X_train, y_train = prepare_data(train_data["POWER"], window_size)
# X_test = weather_forecast_input["TIMESTAMP"].apply(lambda x: x[-2:]).astype(int).values
# print(X_train.shape)
# X_train = X_train.reshape(-1, 1)
# print(X_train.shape)
# X_test = X_test.reshape(-1, 1)

In [None]:
# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_true)

# Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train, y_train)
svr_predictions = svr_model.predict(X_true)

# # Artificial Neural Network
# ann_model = Sequential()
# ann_model.add(Dense(units=50, activation='relu', input_dim=1))
# ann_model.add(Dense(units=1))
# ann_model.compile(optimizer='adam', loss='mean_squared_error')
# ann_model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
# ann_predictions = ann_model.predict(X_test)

# # Recurrent Neural Network
# X_train_rnn = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
# rnn_model = Sequential()
# rnn_model.add(SimpleRNN(units=50, activation='relu', input_shape=(10, 1)))
# rnn_model.add(Dense(units=1))
# rnn_model.compile(optimizer='adam', loss='mean_squared_error')
# rnn_model.fit(X_train_rnn, y_train, epochs=100, batch_size=32, verbose=0)
# X_test_rnn = np.reshape(X_test, (X_test.shape[0], 1, 1))
# rnn_predictions = rnn_model.predict(X_test_rnn)

In [None]:
forecast_template = pd.DataFrame({"TIMESTAMP": solution["TIMESTAMP"]})

forecast_template["POWER"] = lr_predictions
forecast_template.to_csv("ForecastTemplate3-LR.csv", index=False)

forecast_template["POWER"] = svr_predictions
forecast_template.to_csv("ForecastTemplate3-SVR.csv", index=False)

# forecast_template["POWER"] = ann_predictions.flatten()
# forecast_template.to_csv("ForecastTemplate3-ANN.csv", index=False)

# forecast_template["POWER"] = rnn_predictions.flatten()
# forecast_template.to_csv("ForecastTemplate3-RNN.csv", index=False)

In [None]:
def rmse(true_values, predictions):
    return np.sqrt(mean_squared_error(true_values, predictions))

true_power = solution["POWER"].values

lr_rmse = rmse(true_power, lr_predictions)
svr_rmse = rmse(true_power, svr_predictions)
# ann_rmse = rmse(true_power, ann_predictions)
# rnn_rmse = rmse(true_power, rnn_predictions)

In [None]:
comparison_table = pd.DataFrame({"Model": ["Linear Regression", "Support Vector Regression"], "RMSE": [lr_rmse, svr_rmse]})
print(comparison_table)

In [None]:
# Graph
min_timestamp = train_data['TIMESTAMP'].min()
X_true_datetime = pd.to_datetime(((X_true * (24 * 60 * 60)).astype(int)).flatten(), unit="s", origin=min_timestamp)

# Plot the time-series figure for Linear Regression and SVR
plt.figure(figsize=(14, 6))
plt.plot(X_true_datetime, solution['POWER'].values, label="True wind energy")
plt.plot(X_true_datetime, lr_predictions, label="Linear Regression")
plt.plot(X_true_datetime, svr_predictions, label="SVR")
plt.xlabel("Time")
plt.ylabel("Wind power")
plt.title("Wind power forecasts (Linear Regression and SVR)")
plt.legend()
plt.show()