## Sine wave

From [here](https://github.com/pytorch/examples/tree/master/time_sequence_prediction).

In [None]:
import numpy as np
import torch

np.random.seed(2)

T = 20
L = 1000
N = 100

x = np.empty((N, L), 'int64') # 100 x 1000
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1) # 100 x 1000

* `L`: The length of each observation
* `N`: The number of observations
* `T`: The periodicity of the observations

In [None]:
print("An example observation:", x[:, 0]) # Length 100
print("A single time step; 1000 values of different series:", x[0, :]) # Length 1000

In [None]:
data = np.sin(x / 1.0 / T).astype('float64')

In [None]:
torch.save(data, open('data/traindata.pt', 'wb'))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.lstm1 = nn.LSTMCell(1, 51)
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)

    def forward(self, input, future = 0):
        outputs = []
        # input.size(0): number of features
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)

        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            # input.size(1): number of time steps
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
            
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]

        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs

In [None]:
np.random.seed(0)
torch.manual_seed(0)

# load data and make training set
data = torch.load('data/traindata.pt')
input = torch.from_numpy(data[3:, :-1])
target = torch.from_numpy(data[3:, 1:])
test_input = torch.from_numpy(data[:3, :-1])
test_target = torch.from_numpy(data[:3, 1:])

In [None]:
print(input.shape)
print(target.shape)

**Interpretation**: 

* We have 100 sinusoidal series of length 1000
* For each sequence, we need an "input" and a "target".
* The input is elements 1 through 999. The output is elements 2 through 1000.

In [None]:
%matplotlib inline

# build the model
seq = Sequence()
seq.double()
criterion = nn.MSELoss()
# use LBFGS as optimizer since we can load the whole data to train
optimizer = optim.LBFGS(seq.parameters(), lr=0.8) 
#begin to train
for i in range(1):
    print('STEP: ', i)
    def closure():
        optimizer.zero_grad() # zeroing gradient at each step of LSTM. 
        out = seq(input) # Feeding in entire sequence at each time step. 
        loss = criterion(out, target) # Comparing output sequence 
        # (same length and dimension (see above) as input sequence)
        print('loss:', loss.item())
        loss.backward() # Get gradients
        return loss
    optimizer.step(closure) # Train
    # begin to predict, no need to track gradient here
    with torch.no_grad():
        future = 1000
        pred = seq(test_input, future=future)
        loss = criterion(pred[:, :-future], test_target) # See below
        print('test loss:', loss.item())
        y = pred.detach().numpy() # Just get y "out of the graph" and into a numpy array.

    # draw the result
    plt.figure(figsize=(30,10))
    plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
    plt.xlabel('x', fontsize=20)
    plt.ylabel('y', fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    def draw(yi, color):
        plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
        plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
    draw(y[0], 'r')
    draw(y[1], 'g')
    draw(y[2], 'b')
    plt.savefig('predict%d.pdf'%i)

**Note:** LBFGS will do 20 iterations by default. See [here](https://pytorch.org/docs/master/_modules/torch/optim/lbfgs.html).

Explanation of `torch.no_grad()` block:

* `pred[:, :-future]`: Take the last 999 time steps, since `seq` returns both predictions on the input sequence and predictions into the future. 
* `test_target` is just the 999 future time steps the LSTM model is trying to match.