<a href="https://colab.research.google.com/github/Ajayrajc1998/multivariate_LSTM/blob/main/Training_LSTM_with_multiple_time_frame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Code for training hourly eletricity and daily weather data and predicting

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt

# === Load and Preprocess Data === #

# Load electricity data
electricity_data = pd.read_csv('/kaggle/input/hourly-electricity-consumption-and-production/electricityConsumptionAndProductioction.csv')
electricity_data['DateTime'] = pd.to_datetime(electricity_data['DateTime'])
electricity_clean = electricity_data[
    ['DateTime', 'Consumption', 'Nuclear', 'Wind', 'Hydroelectric',
     'Oil and Gas', 'Coal', 'Solar', 'Biomass']
]

# Load weather data
weather_data = pd.read_csv('/kaggle/input/romania-weather-visual-crossing-weather/weather_2011-2021_Romania.csv', encoding='latin1')
weather_data['Date time'] = pd.to_datetime(weather_data['Date time'])
weather_clean = weather_data[
    ['Date time', 'Temperature', 'Dew Point', 'Relative Humidity',
     'Wind Speed', 'Cloud Cover', 'Visibility', 'Sea Level Pressure']
]

# === Scaling Data === #

# Scale electricity data (excluding 'DateTime' column)
electricity_scaler = StandardScaler()
electricity_scaled = electricity_clean.drop(columns=['DateTime', 'Consumption']).values
electricity_clean_scaled = electricity_scaler.fit_transform(electricity_scaled)

# Scale 'Consumption' using MinMaxScaler
consumption_scaler = MinMaxScaler(feature_range=(0, 1))
consumption_scaled = consumption_scaler.fit_transform(electricity_clean[['Consumption']])

# Scale weather data (excluding 'Date time' column)
weather_scaler = StandardScaler()
weather_scaled = weather_clean.drop(columns=['Date time']).values
weather_clean_scaled = weather_scaler.fit_transform(weather_scaled)

# === Train/Test Split === #

train_size = int(len(electricity_clean_scaled) * 0.8)
train_electricity = electricity_clean_scaled[:train_size]
test_electricity = electricity_clean_scaled[train_size:]

train_weather = weather_clean_scaled[:train_size]
test_weather = weather_clean_scaled[train_size:]

train_target = consumption_scaled[:train_size]
test_target = consumption_scaled[train_size:]

# === Creating Datasets === #

class ElectricityDataset(Dataset):
    def __init__(self, data, target_column='Consumption', sequence_length=24):
        self.data = data  # NumPy array of the dataset
        self.target_column_index = data.columns.get_loc(target_column) if isinstance(data, pd.DataFrame) else -1
        self.target_column = data[:, self.target_column_index]  # Adjusted for NumPy array indexing
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        seq_x = self.data[idx:idx + self.sequence_length, :]
        target = self.target_column[idx + self.sequence_length - 1]  # Adjust the target index for correct value
        return torch.tensor(seq_x, dtype=torch.float32), torch.tensor(target, dtype=torch.float32)

class WeatherDataset(Dataset):
    def __init__(self, data, sequence_length=24):
        self.data = data
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        seq_weather = self.data[idx:idx + self.sequence_length, :]
        return torch.tensor(seq_weather, dtype=torch.float32)


# Sequence length for electricity data (e.g., 24 hours)
sequence_length = 24

# Create datasets for training and testing
train_electricity_dataset = ElectricityDataset(train_electricity, target_column='Consumption', sequence_length=24)
test_electricity_dataset = ElectricityDataset(test_electricity, target_column='Consumption', sequence_length=24)
train_weather_dataset = WeatherDataset(train_weather)
test_weather_dataset = WeatherDataset(test_weather)

# DataLoaders
batch_size = 32
train_electricity_loader = DataLoader(train_electricity_dataset, batch_size=batch_size, shuffle=True)
test_electricity_loader = DataLoader(test_electricity_dataset, batch_size=batch_size, shuffle=False)

train_weather_loader = DataLoader(train_weather_dataset, batch_size=1, shuffle=False)
test_weather_loader = DataLoader(test_weather_dataset, batch_size=1, shuffle=False)


# Extract the timestamps for the train and test datasets
train_start_date = electricity_clean['DateTime'].iloc[0]
train_end_date = electricity_clean['DateTime'].iloc[train_size + sequence_length - 1]

test_start_date = electricity_clean['DateTime'].iloc[train_size + sequence_length]
test_end_date = electricity_clean['DateTime'].iloc[-1]


# Display the first few rows of the error DataFrame





# === Defining the Model === #
class CombinedLSTMModel(nn.Module):
    def __init__(self, electricity_input_size, weather_input_size, hidden_size, output_size):
        super(CombinedLSTMModel, self).__init__()

        # Separate LSTMs for electricity and weather data
        self.electricity_lstm = nn.LSTM(electricity_input_size, hidden_size, batch_first=True)
        self.weather_lstm = nn.LSTM(weather_input_size, hidden_size, batch_first=True)

        # Fully connected layer for combining features
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, 128),
            nn.ReLU(),
            nn.Linear(128, output_size)
        )

    def forward(self, electricity_data, weather_data):
        # Process electricity data (shape: batch_size, sequence_length, num_features)
        _, (elec_hn, _) = self.electricity_lstm(electricity_data)
        elec_features = elec_hn[-1]  # Use last hidden state

        # Process weather data (shape: batch_size, sequence_length, num_features)
        _, (weather_hn, _) = self.weather_lstm(weather_data)
        weather_features = weather_hn[-1]  # Use last hidden state

        # Ensure both features have same shape
        if weather_features.dim() == 3:  # Shape is [1, batch_size, hidden_size]
            weather_features = weather_features.squeeze(0)  # Remove unnecessary dimension, shape: [batch_size, hidden_size]

        # Combine features and make prediction
        combined_features = torch.cat([elec_features, weather_features], dim=1)
        output = self.fc(combined_features)
        return output


# Model parameters
electricity_input_size = train_electricity.shape[1]
weather_input_size = train_weather.shape[1]
hidden_size = 64
output_size = 1

# Initialize model
model = CombinedLSTMModel(electricity_input_size, weather_input_size, hidden_size, output_size)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# === Revised Training Loop === #
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    # Ensure synchronized iterators for electricity and weather data
    electricity_iter = iter(train_electricity_loader)
    weather_iter = iter(train_weather_loader)

    for i in range(len(train_electricity_loader)):
        optimizer.zero_grad()

        # Fetch the current batch from electricity and weather loaders
        try:
            elec_inputs, elec_targets = next(electricity_iter)  # Electricity batch
            weather_inputs = next(weather_iter)  # Weather batch
        except StopIteration:
            # Reset iterator if weather loader runs out of data
            weather_iter = iter(train_weather_loader)
            weather_inputs = next(weather_iter)

        # Ensure weather_inputs matches the electricity batch size
        # Repeat weather_inputs for the batch size of electricity inputs
        weather_inputs = weather_inputs.repeat(elec_inputs.size(0), 1, 1)

        # Forward pass
        outputs = model(elec_inputs, weather_inputs)
        loss = criterion(outputs.squeeze(), elec_targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_electricity_loader)}")

# === Revised Evaluation Loop === #
model.eval()
predictions = []
actual_values = []

with torch.no_grad():
    electricity_iter = iter(test_electricity_loader)
    weather_iter = iter(test_weather_loader)

    for i in range(len(test_electricity_loader)):
        try:
            elec_inputs, elec_targets = next(electricity_iter)  # Test electricity batch
            weather_inputs = next(weather_iter)  # Test weather batch
        except StopIteration:
            # Reset iterator if weather loader runs out
            weather_iter = iter(test_weather_loader)
            weather_inputs = next(weather_iter)

        # Ensure weather_inputs matches electricity batch size
        weather_inputs = weather_inputs.repeat(elec_inputs.size(0), 1, 1)

        # Forward pass
        batch_predictions = model(elec_inputs, weather_inputs)

        # Append predictions and actual values
        predictions.extend(batch_predictions.squeeze().tolist())
        actual_values.extend(elec_targets.tolist())

# === Rescale Predictions and Calculate Errors === #
predictions = np.array(predictions).reshape(-1, 1)
actual_values = np.array(actual_values).reshape(-1, 1)

# Inverse transform the scaled predictions and actual values
predictions_original_scale = consumption_scaler.inverse_transform(predictions)
actual_values_original_scale = consumption_scaler.inverse_transform(actual_values)

# Calculate absolute error, RMSE, and MAE
absolute_error = np.abs(predictions_original_scale - actual_values_original_scale)
rmse = np.sqrt(np.mean((predictions_original_scale - actual_values_original_scale) ** 2))
mae = np.mean(absolute_error)


# === Add Timestamps to error_df === #
# Extract test timestamps from the original dataset
test_timestamps = electricity_clean['DateTime'].iloc[train_size + sequence_length:]

# Ensure error_df includes these timestamps
error_df = pd.DataFrame({
    'Timestamp': test_timestamps.values[:len(predictions_original_scale)],
    'Predicted Consumption': predictions_original_scale.flatten(),
    'Actual Consumption': actual_values_original_scale.flatten(),
    'Absolute Error': absolute_error.flatten()
})



# === Filter First Two Days of Data === #
# Convert Timestamp column to datetime for filtering
error_df['Timestamp'] = pd.to_datetime(error_df['Timestamp'])

# Filter data for the first two days
start_date = error_df['Timestamp'].iloc[0]
end_date = start_date + pd.Timedelta(days=2)
filtered_df = error_df[(error_df['Timestamp'] >= start_date) & (error_df['Timestamp'] < end_date)]

# Print train and test time periods
print(f"Train Data Period: {train_start_date} to {train_end_date}")
print(f"Test Data Period: {test_start_date} to {test_end_date}")

# Display first few rows of the updated error_df
error_df.head(20)