In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np

%matplotlib qt
import matplotlib.pyplot as plt
import matplotlib as mpl

from IPython.core.debugger import set_trace
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import MinMaxScaler

In [None]:
num_training_samples = 10000
sequence_length = 5
train_data = np.empty((num_training_samples, sequence_length, 2)) # Hard-coded for 2 features

for i in range(num_training_samples):
    sample = np.array(random.randint(1000000, 1100000), ndmin=3) # [batch, timestep, feature]
    features = np.random.rand(sequence_length)
    features = np.reshape(features, (1, -1, 1))

    for j in range(sequence_length - 1):
        next_value = np.array([sample[:, -1, :] * 1.10 * features[:, j, :]])
        sample = np.append(sample, next_value, axis=1)
    
    sample = np.append(sample, features, axis=2)
    train_data[i, :, :] = sample

mins_train = train_data.min(axis=0)
train_data = (train_data - mins_train)
maxes_train = train_data.max(axis=0)
train_data = train_data / maxes_train
train_data = torch.tensor(train_data, requires_grad = True, dtype=torch.float32)
train_x = train_data[:, :, 1] # features
train_y = train_data[:, :, 0] # sequences

In [None]:
num_testing_samples = 10000
sequence_length = 5
test_data = np.empty((num_testing_samples, sequence_length, 2)) # Hard-coded for 2 features

for i in range(num_testing_samples):
    sample = np.array(random.randint(1000000, 1100000), ndmin=3) # [batch, timestep, feature]
    features = np.random.rand(sequence_length)
    features = np.reshape(features, (1, -1, 1))

    for j in range(sequence_length - 1):
        next_value = np.array([sample[:, -1, :] * 1.10 * features[:, j, :]])
        sample = np.append(sample, next_value, axis=1)
    
    sample = np.append(sample, features, axis=2)
    test_data[i, :, :] = sample

mins_test = test_data.min(axis=0)
test_data = (test_data - mins_test)
maxes_test = test_data.max(axis=0)
test_data = test_data / maxes_test
test_data = torch.tensor(test_data, requires_grad = True, dtype=torch.float32)
test_x = test_data[:, :, 1] # features
test_y = test_data[:, :, 0] # sequences

In [None]:
class CustomRecurrent(nn.Module):
    
    def __init__(self, num_hidden_neurons):
        super(CustomRecurrent, self).__init__()
        
        self.sequence_length = -1
        self.batch_size = -1
        self.num_hidden_neurons = num_hidden_neurons
        
        self.input_layer = nn.Linear(2, 10)
        self.hidden_layer = nn.Linear(20, self.num_hidden_neurons)
        self.output_layer = nn.Linear(self.num_hidden_neurons, 1)
        
    def forward(self, x):
        assert(self.sequence_length > 0 and self.batch_size > 0)
        y = self.input_layer(torch.cat([x[:, 0:1], torch.zeros((self.batch_size, 1))], dim=1))
        y = torch.tanh(y)
        y = self.hidden_layer(torch.cat([y, torch.zeros(self.batch_size, self.num_hidden_neurons)], dim=1))
        y = torch.tanh(y)
        hidden = y
        output = self.output_layer(y)
        for i in list(range(self.sequence_length - 1)):
            y = self.input_layer(torch.cat([x[:, i+1:i+2], output[:, i:i+1]], dim=1))
            y = torch.tanh(y)
            y = self.hidden_layer(torch.cat([y, hidden], dim=1))
            y = torch.tanh(y)
            hidden = y
            y = self.output_layer(y)
            prev = y
            output = torch.cat([output, y], dim=1)
        
        return output
    
    def set_sequence_length(self, sequence_length):
        assert(isinstance(sequence_length, int) or isinstance(sequence_length, float))
        assert(sequence_length > 1)
        if (isinstance(sequence_length, float)):
            assert(sequence_length.is_integer())
            
        self.sequence_length = sequence_length
        
    def set_batch_size(self, batch_size):
        assert(isinstance(batch_size, int) or isinstance(batch_size, float))
        assert(batch_size > 1)
        if (isinstance(batch_size, float)):
            assert(batch_size.is_integer())
            
        self.batch_size = batch_size

In [None]:
a = 20

In [None]:
# Create a TensorBoard summary of the training session. Variable A keeps track of session ID. Set in above cell before running training cell.
writer = SummaryWriter('runs//run' + str(a))
a += 1

# Create a recurrent network. Constructor argument specifies number of hidden neurons. 
network = CustomRecurrent(10)
network.set_batch_size(16)
network.set_sequence_length(5)

# Mean Squared Error Loss and an Rprop optimizer. Rprop seems to work best by far for recurrent networks.
criterion = nn.MSELoss()
optimizer = optim.Rprop(network.parameters(), etas=(0.3, 1.1), step_sizes=(1e-6, 1))

# The first dimension of the tensor containing the input data should be the sample dimension. Also create lists for keeping track of loss throughout training. 
# The loop runs through all integers 0-num_training_samples which are multiples of batch_size. The (last num_training_samples % batch_size) samples are discarded.
num_training_samples = train_data.size()[0]
training_losses = []
validation_losses = []
for i in list(range(0, num_training_samples, network.batch_size))[:-1]:
    batch_x = train_x[i:i + network.batch_size, :]
    batch_y = train_y[i:i + network.batch_size, :]
    batch_x_val = test_x[i:i + network.batch_size, :]
    batch_y_val = test_y[i:i + network.batch_size, :]
    
    optimizer.zero_grad()
    network_output = network(batch_x)
    loss = criterion(network_output[:, 1:], batch_y[:, 1:])
    writer.add_scalar('training_loss', loss / (network.batch_size * network.sequence_length), i) # Keeps track of average loss per element of output matrix
    training_losses.append(loss) # Keeps tracks of total loss
    loss.backward()
    optimizer.step()
    
    # Calculate and record validation set loss
    validation_loss = criterion(network(batch_x_val)[:, 1:], batch_y_val[:, 1:])
    validation_losses.append(validation_loss)

    
    # Keep track of the sum of gradients at each level of the network to help diagnose exploding/vanishing gradients.
    named_params = list(network.named_parameters())
    for j, parameter in enumerate(network.parameters()):
        if i > 0:
            writer.add_scalar('layer ' + str(named_params[j][0]) + ' gradients', torch.sum(parameter.grad)**2, i)
    
    
# TensorBoard visual graph summary of the whole network.
network.set_batch_size(num_training_samples)
writer.add_graph(network, torch.Tensor(train_x))
writer.close()

In [None]:
tensor = torch.tensor([5.0], requires_grad = True)
optimizer = optim.Rprop([tensor])

In [None]:
tensor = tensor * torch.tensor([5.0])

In [None]:
tensor.backward()

In [None]:
optimizer.step()

In [None]:
tensor.grad

In [None]:
print(list(network.parameters())[0].grad)

In [None]:
plt.figure()
plt.plot(training_losses)
plt.plot(validation_losses)
plt.show()
print(torch.mean(torch.tensor(training_losses[-100:])))
print(torch.mean(torch.tensor(validation_losses[-100:])))

In [None]:
batch_y_val.detach().numpy() * maxes_test[:, 0].reshape((1, -1)) + mins_test[:, 0].reshape((1, -1))

In [None]:
batch_x_val.detach().numpy() * maxes_train[:, 1].reshape((1, -1)) + mins_train[:, 1].reshape((1, -1))

In [None]:
batch_x_val

In [None]:
network.set_batch_size(16)
output = network(batch_x_val)
output.detach().numpy() * maxes_train[:, 0].reshape((1, -1)) + mins_train[:, 0].reshape((1, -1))

In [None]:
batch_y_val

In [None]:
maxes_train[:, 0].reshape((1, -1)).shape

In [None]:
for i in range(10): grad_fn = grad_fn.next_functions[0][0]; print(grad_fn)