# Transformer (Short forecasting)
#### Sliding Window Forecasting - 2 week in, 1 day out

### Need to run in Google CoLab

In [None]:
#!pip install torch torchvision torchaudio pandas numpy scikit-learn

from google.colab import drive# type: ignore
from google.colab import files # type: ignore

# Upload files to Google Colab
uploaded = files.upload()

# Mount Google Drive
# drive.mount('/content/drive')

## Importing data

In [6]:
import pandas as pd

# Correct the file path to the actual location of the CSV file
df = pd.read_csv('/Users/casper/Documents/GitHub/p9-energy/Dataset/ConsumptionIndustry.csv', sep=';')

# Load the dataset for colab
#df = pd.read_csv('ConsumptionIndustry.csv', sep=';')

# Convert HourDK to datetime
df['HourDK'] = pd.to_datetime(df['HourDK'])

# Convert ConsumptionkWh to numeric
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)

print(df.head())
print('\n')
print(df.tail())
print('\n')
print(df.info())
print('\n')

            HourUTC              HourDK  MunicipalityNo Branche  \
0  2021-01-01 00:00 2021-01-01 01:00:00             851  Privat   
1  2021-01-01 01:00 2021-01-01 02:00:00             851  Privat   
2  2021-01-01 02:00 2021-01-01 03:00:00             851  Privat   
3  2021-01-01 03:00 2021-01-01 04:00:00             851  Privat   
4  2021-01-01 04:00 2021-01-01 05:00:00             851  Privat   

   ConsumptionkWh  
0       35086.772  
1       31777.762  
2       28423.659  
3       25675.926  
4       24283.909  


                HourUTC              HourDK  MunicipalityNo Branche  \
33834  2024-11-10 18:00 2024-11-10 19:00:00             851  Privat   
33835  2024-11-10 19:00 2024-11-10 20:00:00             851  Privat   
33836  2024-11-10 20:00 2024-11-10 21:00:00             851  Privat   
33837  2024-11-10 21:00 2024-11-10 22:00:00             851  Privat   
33838  2024-11-10 22:00 2024-11-10 23:00:00             851  Privat   

       ConsumptionkWh  
33834       48584.696  


## Data preperation + Feature Engineering

In [8]:
# Featues 
from sklearn.preprocessing import MinMaxScaler

df['Month'] = df['HourDK'].dt.month
df['Weekday'] = df['HourDK'].dt.weekday
df['Hour'] = df['HourDK'].dt.hour
df['isWeekend'] = df['Weekday'] >= 5

# Skale for strøm 
scaler = MinMaxScaler()
df['ConsumptionkWh'] = scaler.fit_transform(df[['ConsumptionkWh']])

print(df.head())

            HourUTC              HourDK  MunicipalityNo Branche  \
0  2021-01-01 00:00 2021-01-01 01:00:00             851  Privat   
1  2021-01-01 01:00 2021-01-01 02:00:00             851  Privat   
2  2021-01-01 02:00 2021-01-01 03:00:00             851  Privat   
3  2021-01-01 03:00 2021-01-01 04:00:00             851  Privat   
4  2021-01-01 04:00 2021-01-01 05:00:00             851  Privat   

   ConsumptionkWh  Month  Weekday  Hour  isWeekend  NormalizedConsumption  
0        0.120207      1        4     1      False               0.120207  
1        0.100206      1        4     2      False               0.100206  
2        0.079932      1        4     3      False               0.079932  
3        0.063323      1        4     4      False               0.063323  
4        0.054909      1        4     5      False               0.054909  


## Define Dataset and Dataloader

In [None]:
import torch
from torch.utils.data import Dataset

class EnergyDataset(Dataset): 
    def __init__self(self, data, window_size=336, forecast_horizon=24):
        self.data = data
        self.window_size = window_size
        self.forecast_horizon = forecast_horizon

    def __len__(self):
        return len(self.data) - self.window_size - self.forecast_horizon
    
    def __getitem__(self, idx):
        x = self.data[idx:idx+self.window_size].values
        y = self.data[idx+self.window_size:idx+self.window_size+self.forecast_horizon].values
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Prepare the data for the sliding window
data = df['ConsumptionkWh'].values
dataset = EnergyDataset(data)

# Create DataLoader
train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
  

## Transformer model

In [None]:
import torch.nn as nn

class EnergyTransformer(torch.nn.Module):
    def __init__(self, input_size, output_size, num_layers, heads, hidden_dim, dim_feedforward, dropout):
        super(EnergyTransformer, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.heads = heads
        self.hidden_dim = hidden_dim
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout

        self.encoder = nn.Linear(input_size, dim_feedforward)
        self.transformer = nn.Transformer(d_model=dim_feedforward, nhead=heads, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dim_feedforward=hidden_dim, dropout=dropout)
        self.decoder = nn.Linear(dim_feedforward, output_size)
    
    def forward(self, x):
        x = x.permute(1, 0, 2)
        x = self.encoder(x)
        x = self.transformer(x, x)
        x = x[-1, :, :]
        x = self.decoder(x)
        return x

# Initialize the model
model = EnergyTransformer(input_size=1, output_size=24, num_layers=2, heads=4, hidden_dim=32, dim_feedforward=512, dropout=0.1)

### Define Loss Function and Optimizer

In [None]:
# Define the loss function and optimizer

# Mean Squared Error Loss (L2 Loss)
criterion = nn.MSELoss()

# Adam Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

## Traning the model

In [None]:
epochs = 50 
for epoch in range(epochs): 
  model.train()
  epoch_loss = 0

  for x_batch, y_batch in train_loader:
    # Step 1 - Zero the gradients
    optimizer.zero_grad()

    # Step 2 - Forward pass
    output = model(x_batch.unsqueeze(-1))

    # Step 3 - Compute the loss
    loss = criterion(output, y_batch)

    # Step 4 - Backward pass
    loss.backward()

    # Step 5 - Update the weights
    optimizer.step()

    # Add the loss to the epoch loss
    epoch_loss += loss.item()

  print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader)}')
 

## Evaluate Model

In [None]:

model.eval()
forecast_results = []
forecast_start_date = pd.to_datetime('2021-11-01')

for i in range(365): 
  # Extract the last two weeks of data
  end_idx = df[df['HourDK'] == forecast_start_date].index[0]
  window_data = df['ConsumptionkWh'].iloc[end_idx-336:end_idx].values
  X_input = torch.tensor(window_data, dtype=torch.float32).unsqueeze(0).unsqueeze(-1)

  # Predict the next 24 hours
  with torch.no_grad():
    predicted = model(X_input).squeeze().numpy()

  # Append the results to the forecast_results list
  predicted  = scaler.inverse_transform(predicted.reshape(-1, 1)).flatten()

  

### Common Evaluation Metrics for Regression


### Visualize Model Performance

## Load Tensorboard

In [None]:
%reload_ext tensorboard 

## Save data from traning om Google Colab as zip file
Makes a zip file from traning data

In [None]:
!zip -r Training.zip Training_data/