In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import torch
from torch.autograd import Variable
import torch.utils.data

from src.data_handling.io_interaction import read_as_pandas_from_disk
from src.utils.helpers import parse_data_for_ml

# Data import and parsing

In [None]:
data = read_as_pandas_from_disk("./data/training-data.txt")
data_parsed = parse_data_for_ml(data)

In [None]:
training_set = data_parsed.iloc[:,1:2].values
plt.plot(training_set[0:100], label = 'Temperature')
plt.show()

## Smoothing the data

In [None]:
# Data should be smoothed before training
# We will use a moving average to smooth the data
# We will use a window of x timesteps
moving_average_window = 10
moving_average = np.convolve(training_set[:,0], np.ones(moving_average_window)/moving_average_window, mode='same')
# Bring into same format as training_set (maybe remove later, check with sliding window algo)
moving_average = moving_average.reshape(-1,1)

In [None]:
# Plot the training set and the moving average on four different points in time
plt.subplot(2, 2, 1)
plt.plot(training_set[0:100], label = 'Temperature')
plt.plot(moving_average[0:100], label = 'Temperature', color='red')
plt.subplot(2, 2, 2)
plt.plot(training_set[1000:1100], label = 'Temperature')
plt.plot(moving_average[1000:1100], label = 'Temperature', color='red')
plt.subplot(2, 2, 3)
plt.plot(training_set[2000:2100], label = 'Temperature')
plt.plot(moving_average[2000:2100], label = 'Temperature', color='red')
plt.subplot(2, 2, 4)
plt.plot(training_set[len(training_set)-500:len(training_set)], label = 'Temperature')
plt.plot(moving_average[len(training_set)-500:len(training_set)], label = 'Temperature', color='red')
plt.show()


In [None]:
training_set = moving_average

# Sliding Windows

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-2*seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length:i+seq_length*2].reshape(-1, 1)
        x.append(_x)
        y.append(_y.reshape(1,-1)[0])

    return np.array(x), np.array(y)

sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 24
x, y = sliding_windows(training_data, seq_length)

train_size = int(len(y) * 0.95)
test_size = len(y) - train_size
dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(x[0:train_size])))
trainY = Variable(torch.Tensor(np.array(y[0:train_size])))

testX = Variable(torch.Tensor(np.array(x[train_size:len(x)])))
testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))

# Save minmax scaler

In [None]:
import joblib
joblib.dump(sc, 'temperature_preproc.joblib')

# ML

In [None]:
from temperature_lstm_model import LSTMModel

In [None]:
num_epochs = 1500
learning_rate = 0.01

model = LSTMModel()

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Train the model
for epoch in range(1500):
    model.train()
    outputs = model(trainX)
    optimizer.zero_grad()
    # obtain the loss function
    loss = criterion(outputs, trainY)
    loss.backward()
    
    optimizer.step()
    if epoch % 500 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

#### Testing that it does different things with different input

##### Multistep Prediction

In [None]:
model.eval()
j = 10
base = dataX[j:j+1]
base = base.data.numpy().tolist()[0]

from collections import deque
cache = deque([], maxlen=24)
for value in base:
    cache.append(value)
cache = [cache]

predictions = []
prediction = model(torch.Tensor(cache))
for val in prediction[0]:
    prediction_transformed = sc.inverse_transform([[val.item()]])[0][0]
    predictions.append(prediction_transformed)

plt.plot(predictions)
dataY_plot = sc.inverse_transform(dataY[j].data.numpy().reshape(-1, 1))
plt.plot(dataY_plot)

##### Nice illustrative chart for train/test performance

In [None]:
model.eval()
train_predict = model(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot)
plt.plot(data_predict)
plt.suptitle('Time-Series Prediction')
plt.show()

In [None]:
torch.save(model.state_dict(), "temperature_lstm.model") 

In [None]:
new_model = LSTMModel()

In [None]:
new_model.eval()
train_predict = new_model(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot)
plt.plot(data_predict)
plt.suptitle('Time-Series Prediction')
plt.show()

In [None]:
new_model.load_state_dict(torch.load("temperature_lstm.model"))

In [None]:
new_model.eval()
train_predict = new_model(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

#plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot[:,0][1000:1500], c='b', label='Actual')
plt.plot(data_predict[:,0][1000:1500], c='r', label='Predicted')
plt.suptitle('Time-Series Prediction')
plt.legend()
plt.show()