In [11]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error
import os

In [12]:
working_dir = os.getcwd()

In [13]:
def train_and_evaluate(model, X_train, y_train, X_test, solution):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(solution, y_pred))
    return rmse

In [16]:
train_data = pd.read_csv(f"{working_dir}/../data/TrainData.csv")
train_data = train_data.drop(columns=["U10", "V10", "WS10", "U100", "V100", "WS100"])

forecast_data = pd.read_csv(f"{working_dir}/../data/ForecastTemplate.csv")
solution = pd.read_csv(f"{working_dir}/../data/Solution.csv")

start_date = "2013-11-01 00:00:00"
end_date = "2013-11-30 23:00:00"
mask = (train_data["TIMESTAMP"] >= start_date) & (train_data["TIMESTAMP"] <= end_date)

train_data.set_index("TIMESTAMP", inplace=True)
forecast_data.set_index("TIMESTAMP", inplace=True)
solution.set_index("TIMESTAMP", inplace=True)

X_train = train_data[:-1].values
y_train = train_data[1:].values

X_test = forecast_data.drop(columns=["FORECAST"]).values  # Drop 'FORECAST' column to match the trained models' input

# Linear Regression
lr_model = LinearRegression()
lr_rmse = train_and_evaluate(lr_model, X_train, y_train, X_test, solution)

# Support Vector Regression
svr_model = SVR(kernel="linear", C=1)
svr_rmse = train_and_evaluate(svr_model, X_train, y_train, X_test, solution)

# Artificial Neural Network
ann_model = MLPRegressor(hidden_layer_sizes=(50, 50), activation="relu", solver="adam", max_iter=500)
ann_rmse = train_and_evaluate(ann_model, X_train, y_train, X_test, solution)

# RNN (LSTM)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train_scaled = scaler.fit_transform(y_train.reshape(-1, 1))

X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(loss="mean_squared_error", optimizer="adam")

lstm_model.fit(X_train_reshaped, y_train_scaled, epochs=50, batch_size=1, verbose=0)

y_pred_scaled = lstm_model.predict(X_test_reshaped)
y_pred = scaler.inverse_transform(y_pred_scaled)
lstm_rmse = np.sqrt(mean_squared_error(solution, y_pred))

print("RMSE (Linear Regression):", lr_rmse)
print("RMSE (Support Vector Regression):", svr_rmse)
print("RMSE (Artificial Neural Network):", ann_rmse)
print("RMSE (RNN - LSTM):", lstm_rmse)

ValueError: Found array with 0 feature(s) (shape=(720, 0)) while a minimum of 1 is required by LinearRegression.