In [1]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
import torch
import argparse
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable

## Generating the data

In [2]:
def data_generator(N, seq_length):
    """
    Args:
        seq_length: Length of the adding problem data
        N: # of data in the set
    """
    X_num = torch.rand([N, 1, seq_length])
    X_mask = torch.zeros([N, 1, seq_length])
    Y = torch.zeros([N, 1])
    for i in range(N):
        positions = np.random.choice(seq_length, size=2, replace=False)
        X_mask[i, 0, positions[0]] = 1
        X_mask[i, 0, positions[1]] = 1
        Y[i,0] = X_num[i, 0, positions[0]] + X_num[i, 0, positions[1]]
    X = torch.cat((X_num, X_mask), dim=1)
    return Variable(X), Variable(Y)

In [3]:
X_train, Y_train = data_generator(50000, seq_length=100)
X_test, Y_test = data_generator(1000, seq_length=100)

## Define the network

In [4]:
class Conv1d_pad_same(nn.Module):
    def __init__(self, chomp_size):
        super(Conv1d_pad_same, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Conv1d_pad_same(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Conv1d_pad_same(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)
        self.init_weights()

    def init_weights(self):
        self.linear.weight.data.normal_(0, 0.01)

    def forward(self, x):
        y1 = self.tcn(x)
        return self.linear(y1[:, :, -1])

In [43]:
input_channels = 2
n_classes = 1
batch_size = 128
seq_length = 100
epochs = 10
nhid = 24
levels = 5
kernel_size = 7
dropout = 0.
lr = 0.002
channel_sizes = [nhid]*levels
log_interval = 100

model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout)

model = model.cuda()

X_train = X_train.cuda()
Y_train = Y_train.cuda()
X_test = X_test.cuda()
Y_test = Y_test.cuda()

optimizer = optim.Adam(model.parameters(), lr=lr)


In [44]:
def train(epoch):
    global lr
    model.train()
    batch_idx = 1
    total_loss = 0
    for i in range(0, X_train.size()[0], batch_size):
        if i + batch_size > X_train.size()[0]:
            x, y = X_train[i:], Y_train[i:]
        else:
            x, y = X_train[i:(i+batch_size)], Y_train[i:(i+batch_size)]
        optimizer.zero_grad()
        output = model(x)
        loss = F.mse_loss(output, y)
        loss.backward()
        optimizer.step()
        batch_idx += 1
        total_loss += loss.data[0]

        if batch_idx % log_interval == 0:
            cur_loss = total_loss / log_interval
            processed = min(i+batch_size, X_train.size()[0])
            print('Train Epoch: {:2d} [{:6d}/{:6d} ({:.0f}%)]\tLearning rate: {:.4f}\tLoss: {:.6f}'.format(
                epoch, processed, X_train.size()[0], 100.*processed/X_train.size()[0], lr, cur_loss))
            total_loss = 0


def evaluate():
    model.eval()
    output = model(X_test)
    test_loss = F.mse_loss(output, Y_test)
    print('\nTest set: Average loss: {:.6f}\n'.format(test_loss.data[0]))
    return test_loss.data[0]


In [45]:
print(model)

TCN(
  (tcn): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(2, 24, kernel_size=(7,), stride=(1,), padding=(6,))
        (chomp1): Conv1d_pad_same(
        )
        (relu1): ReLU()
        (dropout1): Dropout(p=0.0)
        (conv2): Conv1d(24, 24, kernel_size=(7,), stride=(1,), padding=(6,))
        (chomp2): Conv1d_pad_same(
        )
        (relu2): ReLU()
        (dropout2): Dropout(p=0.0)
        (net): Sequential(
          (0): Conv1d(2, 24, kernel_size=(7,), stride=(1,), padding=(6,))
          (1): Conv1d_pad_same(
          )
          (2): ReLU()
          (3): Dropout(p=0.0)
          (4): Conv1d(24, 24, kernel_size=(7,), stride=(1,), padding=(6,))
          (5): Conv1d_pad_same(
          )
          (6): ReLU()
          (7): Dropout(p=0.0)
        )
        (downsample): Conv1d(2, 24, kernel_size=(1,), stride=(1,))
        (relu): ReLU()
      )
      (1): TemporalBlock(
        (conv1): Conv1d(24, 24, kernel_size=(7,), str

In [46]:
for ep in range(1, epochs+1):
    train(ep)
    tloss = evaluate()


Test set: Average loss: 0.019186


Test set: Average loss: 0.006770


Test set: Average loss: 0.005723


Test set: Average loss: 0.001981


Test set: Average loss: 0.002204


Test set: Average loss: 0.001169


Test set: Average loss: 0.000999


Test set: Average loss: 0.001330


Test set: Average loss: 0.001697


Test set: Average loss: 0.000725



In [47]:
model.eval()
output = model(X_test[10:11])

In [48]:
X_test[10:11].data.cpu().numpy()

array([[[0.616028  , 0.90313846, 0.06202805, 0.86564493, 0.34850854,
         0.8607909 , 0.2327509 , 0.45310503, 0.46806705, 0.74760807,
         0.21059823, 0.55009705, 0.5269502 , 0.39872706, 0.05495358,
         0.7289608 , 0.8472756 , 0.6520019 , 0.10983479, 0.06915188,
         0.4299478 , 0.27376783, 0.3172034 , 0.21797365, 0.77016574,
         0.4539659 , 0.21675795, 0.13673985, 0.01736778, 0.497369  ,
         0.10169131, 0.2664454 , 0.02172685, 0.53324074, 0.98921514,
         0.9557614 , 0.38263506, 0.03936386, 0.97934014, 0.334042  ,
         0.04893422, 0.7116153 , 0.18837929, 0.21946335, 0.57322216,
         0.4681689 , 0.57627976, 0.5808097 , 0.26296347, 0.21397907,
         0.00286973, 0.91633177, 0.79443693, 0.3584674 , 0.56790763,
         0.655529  , 0.02952695, 0.4447524 , 0.51078296, 0.41392833,
         0.3139944 , 0.9475292 , 0.26595765, 0.6437272 , 0.7807376 ,
         0.04602867, 0.8947014 , 0.690051  , 0.5586796 , 0.43738782,
         0.597533  , 0.46625042, 0

In [49]:
0.606856+0.4139

1.020756

In [50]:
output

Variable containing:
 1.0138
[torch.cuda.FloatTensor of size 1x1 (GPU 0)]