# Task 2.1

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Load electricity spot price data
df = pd.read_csv("Elspotprices2nd.csv")
df["HourUTC"] = pd.to_datetime(df["HourUTC"])
df.set_index("HourUTC", inplace=True)
df = df.sort_index()

# Extract the price column only
lstm1_prices = df["SpotPriceDKK"].copy()

# Split data into training (Jan 2019 to Aug 2024) and testing (Sep 2024)
lstm1_train_data = lstm1_prices.loc["2019-01-01":"2024-08-31"].values.reshape(-1, 1)
lstm1_test_data = lstm1_prices.loc["2024-09-01":"2024-09-30"].values.reshape(-1, 1)

# Normalize both training and testing data
lstm1_scaler = MinMaxScaler()
lstm1_train_scaled = lstm1_scaler.fit_transform(lstm1_train_data)
lstm1_test_scaled = lstm1_scaler.transform(lstm1_test_data)

# Create input-output sequences for training
def create_sequences_lstm1(data, window_size=24):
    X, y = [], []
    for i in range(len(data) - window_size - 23):
        X.append(data[i:i+window_size])             # Input sequence of 24 hours
        y.append(data[i+window_size:i+window_size+24])  # Output: next 24 hours
    return np.array(X), np.array(y)

lstm1_window_size = 24
lstm1_X_train, lstm1_y_train = create_sequences_lstm1(lstm1_train_scaled, lstm1_window_size)

# Reshape input for LSTM
lstm1_X_train = lstm1_X_train.reshape((lstm1_X_train.shape[0], lstm1_X_train.shape[1], 1))

# Build the LSTM model
lstm1_model = Sequential()
lstm1_model.add(LSTM(64, input_shape=(lstm1_window_size, 1)))
lstm1_model.add(Dense(24))  # 24-hour prediction
lstm1_model.compile(loss='mse', optimizer='adam')

# Train the model
lstm1_model.fit(lstm1_X_train, lstm1_y_train, epochs=10, batch_size=32, verbose=1)

# Rolling forecast
def rolling_forecast_lstm1(model, history_scaled, test_scaled, window_size=24):
    predictions = []
    current_input = history_scaled[-window_size:]  # Start with last window from training

    for day in range(30):
        input_seq = current_input.reshape(1, window_size, 1)
        pred_scaled = model.predict(input_seq, verbose=0)[0]
        predictions.append(pred_scaled)

        # Use actual data for next day's input
        actual_next_day = test_scaled[day*24:(day+1)*24]
        current_input = actual_next_day

    return np.array(predictions)

# Generate predictions
lstm1_pred_scaled = rolling_forecast_lstm1(lstm1_model, lstm1_train_scaled, lstm1_test_scaled, lstm1_window_size)

# Inverse transform to original price scale
lstm1_pred_flat = lstm1_pred_scaled.reshape(-1, 1)
lstm1_pred_inv = lstm1_scaler.inverse_transform(lstm1_pred_flat).reshape(30, 24)

# Extract true prices for comparison
lstm1_true_prices = lstm1_test_data.reshape(30, 24)

# Compute RMSE for LSTM
def compute_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

rmse_lstm1 = compute_rmse(lstm1_true_prices, lstm1_pred_inv)
print(f"LSTM (No exogenous) RMSE: {rmse_lstm1:.2f}")

# Compute RMSE for Persistence model
lstm1_persistence_pred = lstm1_true_prices[:-1]    # Predict day N+1 using day N
lstm1_persistence_true = lstm1_true_prices[1:]     # Actual day N+1

rmse_persistence1 = compute_rmse(lstm1_persistence_true, lstm1_persistence_pred)
print(f"Persistence RMSE: {rmse_persistence1:.2f}")


# Task 2.2

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load price data
df_prices = pd.read_csv("Elspotprices2nd.csv")
df_prices["HourUTC"] = pd.to_datetime(df_prices["HourUTC"])
df_prices.set_index("HourUTC", inplace=True)
df_prices = df_prices.sort_index()

# Load exogenous data
df_exo = pd.read_csv("ProdConData.csv")
df_exo["HourUTC"] = pd.to_datetime(df_exo["HourUTC"])
df_exo.set_index("HourUTC", inplace=True)
df_exo = df_exo.sort_index()

# Merge datasets
df_combined = df_prices.join(df_exo, how='inner')

# Select target + exogenous features
exogenous_vars = ["GrossConsumptionMWh", "OffshoreWindGe100MW_MWh", "SolarPowerGe40kW_MWh"]
lstm2_features = ["SpotPriceDKK"] + exogenous_vars
df_lstm2 = df_combined[lstm2_features].dropna()

# Train/test split
lstm2_train = df_lstm2.loc["2019-01-01":"2024-08-31"]
lstm2_test = df_lstm2.loc["2024-09-01":"2024-09-30"]

# Normalize
lstm2_scaler = MinMaxScaler()
lstm2_train_scaled = lstm2_scaler.fit_transform(lstm2_train)
lstm2_test_scaled = lstm2_scaler.transform(lstm2_test)

# Create multivariate sequences
def create_sequences_multivariate(data, window_size=24):
    X, y = [], []
    for i in range(len(data) - window_size - 23):
        X.append(data[i:i+window_size])              # Input: 24x4
        y.append(data[i+window_size:i+window_size+24, 0])  # Output: 24 prices
    return np.array(X), np.array(y)

lstm2_window_size = 24
lstm2_X_train, lstm2_y_train = create_sequences_multivariate(lstm2_train_scaled, lstm2_window_size)

# Build model
lstm2_model = Sequential()
lstm2_model.add(LSTM(64, input_shape=(lstm2_window_size, len(lstm2_features))))
lstm2_model.add(Dense(24))  # Predict 24 hours
lstm2_model.compile(loss='mse', optimizer='adam')
lstm2_model.fit(lstm2_X_train, lstm2_y_train, epochs=10, batch_size=32, verbose=1)

# Forecast function
def rolling_forecast_multivariate(model, test_scaled, window_size=24, n_features=4):
    predictions = []
    for day in range(30):
        start_idx = day * 24
        input_seq = test_scaled[start_idx:start_idx+window_size]
        input_seq = input_seq.reshape(1, window_size, n_features)
        pred_scaled = model.predict(input_seq, verbose=0)[0]
        predictions.append(pred_scaled)
    return np.array(predictions)

# Forecast
lstm2_pred_scaled = rolling_forecast_multivariate(
    lstm2_model, lstm2_test_scaled, window_size=lstm2_window_size, n_features=len(lstm2_features)
)

# Inverse transform only price predictions
lstm2_pred_flat = lstm2_pred_scaled.reshape(-1, 1)
dummy = np.zeros((lstm2_pred_flat.shape[0], len(lstm2_features)))
dummy[:, 0] = lstm2_pred_flat[:, 0]
lstm2_pred_inv = lstm2_scaler.inverse_transform(dummy)[:, 0].reshape(30, 24)

# Ground truth
lstm2_true = lstm2_test["SpotPriceDKK"].values.reshape(30, 24)

# Compute RMSE
def compute_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

rmse_lstm2 = compute_rmse(lstm2_true, lstm2_pred_inv)
print(f"LSTM (with exogenous vars) RMSE: {rmse_lstm2:.2f}")
