In [32]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import sklearn

import preprocessing
import train

In [14]:
%load_ext autoreload
%autoreload 2

In [15]:
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mseabs
        mse = mean_squared_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = sqrt(mse)
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores

In [16]:
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
    # flatten data
    data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
    X, y = list(), list()
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(data)):
        # define the end of the input sequence
        in_end = in_start + n_input
        out_end = in_end + n_out
        # ensure we have enough data for this instance
        if out_end < len(data):
            x_input = data[in_start:in_end, 0]
            x_input = x_input.reshape((len(x_input), 1))
            X.append(x_input)
            y.append(data[in_end:out_end, 0])
        # move along one time step
        in_start += 1
    return array(X), array(y)

In [34]:
# RNN - LSTM
class Lstm(nn.Module):
    def __init__(self,  embeded_dim, hidden_dim, d_out):
        super(Lstm, self).__init__()
        self.embeded_dim = embeded_dim
        self.hidden_dim = hidden_dim
        self.d_out = d_out

        self.lstm = nn.LSTM(self.embeded_dim, self.hidden_dim)
        self.out = nn.Linear(self.hidden_dim, self.d_out)

    def init_hidden(self, batch_size):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (torch.zeros(1, batch_size, self.hidden_dim),
                torch.zeros(1, batch_size, self.hidden_dim))
    
    def forward(self, cell_input):
        lstm_out, self.hidden = self.lstm(cell_input, self.hidden)
        score = self.out(lstm_out)
        return score

In [17]:
df = preprocessing.Preprocessing.load_df('/Users/cm/PycharmProjects/week_program/data/M_data_2019_04_08','train climateschlafzimmer')

In [25]:
df['last_updated'] = df['last_updated'].astype(np.datetime64)

In [27]:
train_set, val_set, test_set = train.train_val_test_split(df, 0.2, 0.2)

In [28]:
X_train = train_set.drop(columns=['current_temp','last_updated'])
y_train = train_set['current_temp']
X_val = val_set.drop(columns=['current_temp','last_updated'])
y_val = val_set['current_temp']
X_test = test_set.drop(columns=['current_temp','last_updated'])
y_test = test_set['current_temp']

In [86]:
y.shape

torch.Size([23210])

In [70]:
X = torch.from_numpy(X_train.to_numpy()).type(torch.float32)

In [51]:
# normalize
X = X / float(X_train.shape[0])

In [71]:
y = torch.from_numpy(y_train.to_numpy()).type(torch.float32)

In [87]:
samples_per_day = 1440
learning_rate = 0.005

n_epochs = 70
batch_size = 60

# Regularisierung
weight_decay=0.0

# the model
hidden_dim = 2500
embeded_dim = X_train.shape[1]
model = Lstm(embeded_dim, hidden_dim, d_out=batch_size)
N = X_train.shape[0]

In [88]:
# ADAM
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [89]:
# Loss
criterion = nn.MSELoss()

In [90]:
loss_hist = []

In [92]:
# Train
epochs = range(n_epochs)
idx = 0

for t in epochs:
    for batch in range(0, int(N/batch_size)+1):
        if(batch<int(N/batch_size)):
        # Step 1. Calculate Batch
            batch_x = X[batch * batch_size : (batch + 1) * batch_size, :]        
            # convert to: sequence x batch_size x n_features 
            batch_x = batch_x.reshape(batch_size, 1, embeded_dim).transpose(0,1)  
            batch_y = y[batch * batch_size : (batch + 1) * batch_size] 
        else:
            batch_x = X[(batch -1) * batch_size +(N % batch_size): batch * batch_size + (N % batch_size), :]   
            # convert to: sequence x batch_size x n_features 
            batch_x = batch_x.reshape(batch_size, seq_length, 1).transpose(0,1)        
            batch_y = y[(batch - 1) * batch_size + (N % batch_size): (batch + 1) * batch_size + (N % batch_size)] 
        # Step 2. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()
            
        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden(batch_size)
        
        # Step 3. Run our forward pass.
        output = model(batch_x)
        # Step 4. Berechne den Fehler mit dem letzten output 
        loss = criterion(output[-1,:,:], batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Berechne den Fehler (Ausgabe des Fehlers alle 100 Iterationen)
    if t % 100 == 0:
        loss_hist.append(loss.item())
        print(t, loss.item())



NameError: name 'seq_length' is not defined