In [1]:
from HeatDMA1 import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import pickle
sns.set()


# Heat data set paths
HeatDMA_Number_of_Meters_path = '/Users/jagrutigodambe/Desktop/Data/heat/HeatDMA_Number_of_Meters.csv'
Training_HeatDMA_path = '/Users/jagrutigodambe/Desktop/Data/heat/Training_HeatDMA.csv'
Testing_HeatDMA_path = '/Users/jagrutigodambe/Desktop/Data/heat/Testing_HeatDMA.csv'
Weather_data_path = '/Users/jagrutigodambe/Desktop/Data/weather/Weather_Bronderslev_20152022.csv'
torch.manual_seed(0)
np.random.seed(0)


In [2]:
# Initialize the data preprocessing with file paths
prepro = Heat_data_preprocessing(Training_HeatDMA_path, 
                        HeatDMA_Number_of_Meters_path,
                        Weather_data_path)

# Fit the preprocessing on training data and transform it, then apply the same transformation to testing data
train_data = prepro.fit()
test_data = prepro.transform(Testing_HeatDMA_path)


train_data_scaling_info = train_data[['comsumption','meters']]
test_data_scaling_info = test_data[['comsumption','meters']]

train_data = train_data.drop(['comsumption','meters'], axis=1)
test_data = test_data.drop(['comsumption','meters'], axis=1)


with open("scaler.pkl", "wb") as f:
    pickle.dump(prepro.get_scaler(), f)

In [3]:
# Create sequences of 192 entries to predict the next 24 entries

sequence_length_x = 192
sequence_length_y = 24 # y is label
sequences, labels , train_scaling_info  = create_sequences(train_data, sequence_length_x, sequence_length_y, train_data_scaling_info)
train_X, train_y, train_scale_info, test_X, test_y, test_scale_info = train_test_split(sequences, labels , train_scaling_info, train_size = 0.8)

100%|██████████| 26088/26088 [00:02<00:00, 11517.04it/s]


In [4]:
def predict(model ,data):#, scaling_info):
    with torch.no_grad():
       data = torch.Tensor(data)
       prediction = model(data)
    return pd.DataFrame(prediction.numpy())

In [4]:
# Set hyperparameters, create data loaders, and initialize the model and optimizer
input_size = 23 # Feature dimension
sequence_length = 192

hidden_size = 128
num_layers = 5

output_size = 24


num_epochs = 500
batch_size = 32

learning_rate = 0.001

device = get_device()
print(device)


class HeatData(Dataset):

    def __init__(self, sequences, labels):
      self.sequences = sequences
      self.labels = labels

    def __len__(self):
      return self.sequences.shape[0]

    def __getitem__(self, idx):
      return torch.Tensor(self.sequences[idx]), torch.Tensor(self.labels[idx])


heatData_train = HeatData(train_X, train_y)
heatData_val = HeatData(test_X, test_y)

train_dataloader = DataLoader(heatData_train, batch_size=batch_size, shuffle = True)
test_dataloader = DataLoader(heatData_val, batch_size=batch_size, shuffle = False)

model = LSTMModel(input_size, hidden_size, output_size, num_layers)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

mps


In [None]:
# Train the model 

list_train_loss = []
list_val_loss = []

prev_val_loss = 1000 
epochs = 50
for e in tqdm(range(epochs)):#

    model.train()
    for data, labels in train_dataloader:
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()

        optimizer.zero_grad()
        target = model(data)
        loss = criterion(target, labels)
        loss.backward()
        optimizer.step()

    list_train_loss.append(loss.item())

    valid_loss = 0.0

    model.eval()
    with torch.no_grad(): 
        for data, labels in test_dataloader:
            if torch.cuda.is_available():
                data, labels = data.cuda(), labels.cuda()

            target = model(data)
            loss = criterion(target, labels)
            valid_loss += loss.item()

    valid_loss = valid_loss / len(test_dataloader)
    list_val_loss.append(valid_loss)
    

    if prev_val_loss > valid_loss:
        prev_val_loss = valid_loss
        torch.save(model.state_dict(), 'model_daylight{}.pth'.format(e))

    print(f'Epoch {e+1} \t\t Training Loss: {loss.item()} \t\t Validation Loss: {valid_loss}')

In [None]:
# Load the trained LSTM model weights 
model = LSTMModel(input_size, hidden_size, output_size, num_layers
model.load_state_dict(torch.load('model_daylight{}.pth'.format(3)))

In [None]:
# Generate predictions and calculate MAE and MAPE.
predictions = predict(model, test_X)
prediction_for_all_meter = pd.DataFrame(test_scale_info[:,:,1]) * predictions

mean_absolute_error(test_scale_info[:,:,0],prediction_for_all_meter.to_numpy())

mean_absolute_percentage_error(test_scale_info[:,:,0],prediction_for_all_meter.to_numpy())

In [None]:
# Predict and fill missing values in the test data using the model.
def test_pred(model, test_data):
    torch.no_grad()
    test_data = test_data.reset_index()
    
    idx = test_data[test_data.Per_meter_comsumption_with_inter.isna()].timestamp.dt.date.drop_duplicates(keep='first').index.tolist()
    print(idx)
    for i in idx:
        pre = model(torch.Tensor((test_data.loc[i-192
              :i-1]).drop('timestamp', axis=1).values).reshape(1,192,23))
       #print((pre.detach().numpy().reshape(-1,1)))
        #print(len(test_data.loc[i :i+23,'Per_meter_comsumption_with_inter']))
        test_data.loc[i :i+23,'Per_meter_comsumption_with_inter'] = pre.detach().numpy().reshape(-1,1)
    
    return test_data

In [None]:

preTest = test_pred(model, test_data)
preTest= preTest.set_index('timestamp')
# Calculating original consumption value from per meter consumption and number of meters
preTest.Per_meter_comsumption_with_inter  = preTest.Per_meter_comsumption_with_inter * test_data_scaling_info.meters
# Storing results in CSV
preTest[['Per_meter_comsumption_with_inter']].to_csv('HeatDMA1_Predictions.csv',index=True)