In [3]:
from WaterDMA2 import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
sns.set()

# Water data set paths
WaterDMA_2_Number_of_Meters_path = '/Users/jagrutigodambe/Desktop/Data/water/WaterDMA_2_Number_of_Meters.csv'
Training_WaterDMA_2_path = '/Users/jagrutigodambe/Desktop/Data/water/Training_WaterDMA_2.csv'
Testing_WaterDMA_2_path = '/Users/jagrutigodambe/Desktop/Data/water/Testing_WaterDMA_2.csv'
Weather_data_path = '/Users/jagrutigodambe/Desktop/Data/weather/Weather_Bronderslev_20152022.csv'
torch.manual_seed(0)
np.random.seed(0)


In [4]:
# Initialize the data preprocessing with file paths
prepro = Water_data_preprocessing(Training_WaterDMA_2_path,
                        WaterDMA_2_Number_of_Meters_path,
                        Weather_data_path)

# Fit the preprocessing on training data and transform it, then apply the same transformation to testing data
train_data = prepro.fit()
test_data = prepro.transform(Testing_WaterDMA_2_path)

train_data_scaling_info = train_data[['comsumption','meters']]
test_data_scaling_info = test_data[['comsumption','meters']]

train_data = train_data.drop(['comsumption','meters'], axis=1)
test_data = test_data.drop(['comsumption','meters'], axis=1)

In [5]:
# Create sequences of 192 entries to predict the next 24 entries

sequence_length_x = 192
sequence_length_y = 24 # y is label
sequences, labels , train_scaling_info  = create_sequences(train_data, sequence_length_x, sequence_length_y, train_data_scaling_info)
train_X, train_y, train_scale_info, test_X, test_y, test_scale_info = train_test_split(sequences, labels , train_scaling_info, train_size = 0.75)


def predict_inverse(model ,data):#, scaling_info):
    with torch.no_grad():
       data = torch.Tensor(data)
       prediction = model(data)
    return pd.DataFrame(prediction.numpy())

100%|██████████| 20232/20232 [00:01<00:00, 12128.80it/s]


In [6]:
# Set hyperparameters, create data loaders, and initialize the model and optimizer
input_size = 23  # Feature dimension
sequence_length = 192

hidden_size = 128
num_layers = 5

output_size = 24


num_epochs = 500
batch_size = 32

learning_rate = 0.001

device = get_device()
print(device)


class HeatData(Dataset):

    def __init__(self, sequences, labels):
      self.sequences = sequences
      self.labels = labels

    def __len__(self):
      return self.sequences.shape[0]

    def __getitem__(self, idx):
      return torch.Tensor(self.sequences[idx]), torch.Tensor(self.labels[idx])


heatData_train = HeatData(train_X, train_y)
heatData_val = HeatData(test_X, test_y)

train_dataloader = DataLoader(heatData_train, batch_size=batch_size, shuffle = True)
test_dataloader = DataLoader(heatData_val, batch_size=batch_size, shuffle = False)

model = LSTMModel(input_size, hidden_size, output_size, num_layers)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

mps


In [None]:
# Train the model 
list_train_loss = []
list_val_loss = []

prev_val_loss = 1000
epochs = 50
for e in tqdm(range(epochs)):
    model.train()
    for data, labels in train_dataloader:
        # Transfer Data to GPU if available
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
    list_train_loss.append(loss.item())
    valid_loss = 0.0

    model.eval()     # Optional when not using Model Specific layer
    for data, labels in test_dataloader:
        # Transfer Data to GPU if available
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        # Forward Pass
        target = model(data)
        # Find the Loss
        val_loss = criterion(target, labels)
        # Calculate Loss
        valid_loss += val_loss.item()
    valid_loss =  valid_loss/len(test_dataloader)
    list_val_loss.append(valid_loss)

    if prev_val_loss > valid_loss:
        # updating loss
        prev_val_loss = valid_loss
        torch.save(model.state_dict(), 'model_watertest{}.pth'.format(e))

    print(f'Epoch {e+1} \t\t Training Loss: { loss.item()} \t\t Validation Loss: { valid_loss}')

In [None]:
# Load the trained LSTM model weights
model = LSTMModel(input_size, hidden_size, output_size, num_layers)
model.load_state_dict(torch.load('model_water{}.pth'.format(7)))

<All keys matched successfully>

In [None]:

predictions = predict_inverse(model, test_X)

prediction_for_all_meter = pd.DataFrame(test_scale_info[:,:,1]) * predictions

mean_absolute_percentage_error(test_scale_info[:,:,0],prediction_for_all_meter.to_numpy())

25.93383428786942

In [None]:
# Generate predictions and calculate MAE and MAPE.
predictions = predict_inverse(model, test_X)
prediction_for_all_meter = pd.DataFrame(test_scale_info[:,:,1]) * predictions

mean_absolute_error(test_scale_info[:,:,0],prediction_for_all_meter.to_numpy())
mean_absolute_percentage_error(test_scale_info[:,:,0],prediction_for_all_meter.to_numpy())

0.5638536555297279

In [None]:
# Predict and fill missing values in the test data using the model.
def test_pred(model, test_data):
    torch.no_grad()
    test_data = test_data.reset_index()
    
    idx = test_data[test_data.Per_meter_comsumption_with_inter.isna()].timestamp.dt.date.drop_duplicates(keep='first').index.tolist()
    print(idx)
    for i in idx:
        pre = model(torch.Tensor((test_data.loc[i-192
              :i-1]).drop('timestamp', axis=1).values).reshape(1,192,23))
       #print((pre.detach().numpy().reshape(-1,1)))
        #print(len(test_data.loc[i :i+23,'Per_meter_comsumption_with_inter']))
        test_data.loc[i :i+23,'Per_meter_comsumption_with_inter'] = pre.detach().numpy().reshape(-1,1)
    
    return test_data


In [None]:
preTest = test_pred(model, test_data)
preTest= preTest.set_index('timestamp')
# Calculating original consumption value from per meter consumption and number of meters
preTest.Per_meter_comsumption_with_inter  = preTest.Per_meter_comsumption_with_inter * test_data_scaling_info.meters
# Storing results in CSV
preTest[['Per_meter_comsumption_with_inter']].to_csv('WaterDMA_2.csv',index=True)

[192, 384, 576, 768, 960, 1152, 1344, 1536, 1728, 1920, 2112, 2304, 2496, 2688, 2880, 3072, 3264, 3456, 3648, 3840]
