In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import datetime as dt

import matplotlib.pyplot as plt
import matplotlib as mpl

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

%matplotlib inline

In [2]:
# Get raw data
def get_raw_data(path):
    df = pd.read_excel(path, index_col=0)
    print(df.head(3))
    return df

raw_data = get_raw_data('../data/raw_data/cascaded_use_case_data.xlsx')

                     Vindhastighet Nilsebu  Lufttemp. Nilsebu  \
Datetime                                                        
2015-01-01 00:00:00                    6.4                1.5   
2015-01-01 01:00:00                    5.8                1.9   
2015-01-01 02:00:00                    6.1                1.6   

                     Vindretning Nilsebu  RelHum Nilsebu  Vannstand Lyngsåna  \
Datetime                                                                       
2015-01-01 00:00:00                200.0            97.3               0.699   
2015-01-01 01:00:00                197.5            95.0               0.700   
2015-01-01 02:00:00                205.3            96.7               0.714   

                     Vanntemp. Hiafossen  Vannstand Hiafossen  \
Datetime                                                        
2015-01-01 00:00:00                  0.9                0.945   
2015-01-01 01:00:00                  0.9                0.946   
2015-01-01 02

In [3]:
df = raw_data[['Q_Kalltveit']]

In [4]:
plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(df, labels=dict(
    created_at="Date", value="Inflow HBV (m3/s)", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()

In [5]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
training_data = sc.fit_transform(df.values)

In [6]:
def sliding_windows(data, seq_length):
    X, y = [], []
    for i in range(len(data)):
        end_idx = i + seq_length
        if end_idx > len(data) - 1:
            break
        seq_x, seq_y = data[i:end_idx], data[end_idx]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)
seq_length = 4
X, y = sliding_windows(training_data, seq_length)


In [7]:
from torch.autograd import Variable

train_size = int(len(y) * 0.80)
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(X)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(X[0:train_size])))
trainY = Variable(torch.Tensor(np.array(y[0:train_size])))

testX = Variable(torch.Tensor(np.array(X[train_size:len(X)])))
testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))

In [8]:
import torch
import torch.nn as nn

class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [9]:

num_epochs = 200
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    outputs = lstm(trainX)
    optimizer.zero_grad()
    
    loss = criterion(outputs, trainY)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

Epoch: 0, loss: 0.20925
Epoch: 100, loss: 0.00257


In [10]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

predict_df = pd.DataFrame(dataY_plot, columns=['Y values'])
predict_df['Y predict'] = data_predict

In [11]:
plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(predict_df, labels=dict(
    created_at="Date", value="Inflow (m3/s)", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()

In [12]:
print("Mean Absolute Error:", mean_absolute_error(dataY_plot, data_predict))

Mean Absolute Error: 1.4331934
