In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '7'

import torch
import torch.nn as nn
import torch.nn.functional as F

def to_cuda(x):
    if torch.cuda.is_available():
        return x.cuda()
    return x

class PointerNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, weight_size, is_GRU=False):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.weight_size = weight_size
        self.is_GRU = is_GRU

        if self.is_GRU:
            RNN = nn.GRU
            RNNCell = nn.GRUCell
        else:
            RNN = nn.LSTM
            RNNCell = nn.LSTMCell

        self.encoder = RNN(input_size, hidden_size, batch_first=True)
        self.decoder = RNNCell(input_size, hidden_size)
        
        self.W1 = nn.Linear(hidden_size, weight_size, bias=False) 
        self.W2 = nn.Linear(hidden_size, weight_size, bias=False) 
        self.vt = nn.Linear(weight_size, 1, bias=False)

    def forward(self, input):
        batch_size = input.shape[0]
        decoder_seq_len = input.shape[1]

        encoder_output, hc = self.encoder(input) 

        # Decoding states initialization
        hidden = encoder_output[:, -1, :] #hidden state for decoder is the last timestep's output of encoder 
        if not self.is_GRU: #For LSTM, cell state is the sencond state output
            cell = hc[1][-1, :, :]
        decoder_input = to_cuda(torch.rand(batch_size, self.input_size))  
        
        # Decoding with attention             
        probs = []
        encoder_output = encoder_output.transpose(1, 0) #Transpose the matrix for mm
        for i in range(decoder_seq_len):  
            if self.is_GRU:
                hidden = self.decoder(decoder_input, hidden) 
            else:
                hidden, decoder_hc = self.decoder(decoder_input, (hidden, cell)) 
            # Compute attention
            sum = torch.tanh(self.W1(encoder_output) + self.W2(hidden))    
            out = self.vt(sum).squeeze()        
            out = F.log_softmax(out.transpose(0, 1).contiguous(), -1)  
            probs.append(out)

        probs = torch.stack(probs, dim=1)           
        return probs

In [2]:
import numpy as np
import torch
import torch.utils.data as Data
#from model import PointerNetwork


EPOCH = 500
BATCH_SIZE = 250
DATA_SIZE = 10000
INPUT_SIZE = 1
HIDDEN_SIZE = 512
WEIGHT_SIZE = 256
LR = 0.001


def to_cuda(x):
    if torch.cuda.is_available():
        return x.cuda()
    return x

def getdata(experiment, data_size):
    if experiment == 1:
        high = 100
        senlen = 5
        x = np.array([np.random.choice(range(high), senlen, replace=False)
                      for _ in range(data_size)])
        y = np.argsort(x)
    elif experiment == 2:
        high = 100
        senlen = 10
        x = np.array([np.random.choice(range(high), senlen, replace=False)
                      for _ in range(data_size)])
        y = np.argsort(x)
    elif experiment == 3:
        senlen = 5
        x = np.array([np.random.random(senlen) for _ in range(data_size)])
        y = np.argsort(x)
    elif experiment == 4:
        senlen = 10
        x = np.array([np.random.random(senlen) for _ in range(data_size)])
        y = np.argsort(x)
    return x, y

def evaluate(model, X, Y):
    probs = model(X) 
    prob, indices = torch.max(probs, 2) 
    equal_cnt = sum([1 if torch.equal(index.detach(), y.detach()) else 0 for index, y in zip(indices, Y)])
    accuracy = equal_cnt/len(X)
    print('Acc: {:.2f}%'.format(accuracy*100))

#Get Dataset
x, y = getdata(experiment=2, data_size = DATA_SIZE)
x = to_cuda(torch.FloatTensor(x).unsqueeze(2))     
y = to_cuda(torch.LongTensor(y)) 
#Split Dataset
train_size = (int)(DATA_SIZE * 0.9)
train_X = x[:train_size]
train_Y = y[:train_size]
test_X = x[train_size:]
test_Y = y[train_size:]
#Build DataLoader
train_data = Data.TensorDataset(train_X, train_Y)
data_loader = Data.DataLoader(
    dataset = train_data,
    batch_size = BATCH_SIZE,
    shuffle = True,
)


#Define the Model
model = PointerNetwork(INPUT_SIZE, HIDDEN_SIZE, WEIGHT_SIZE, is_GRU=False)
if torch.cuda.is_available():
    model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_fun = torch.nn.CrossEntropyLoss()


#Training...
print('Training... ')
for epoch in range(EPOCH):
    for (batch_x, batch_y) in data_loader:
        probs = model(batch_x)         
        outputs = probs.view(-1, batch_x.shape[1])
        batch_y = batch_y.view(-1) 
        loss = loss_fun(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 2 == 0:
        print('Epoch: {}, Loss: {:.5f}'.format(epoch, loss.item()))
        evaluate(model, train_X, train_Y)
#Test...    
print('Test...')
evaluate(model, test_X, test_Y)

Training... 
Epoch: 0, Loss: 1.51756
Acc: 0.00%
Epoch: 2, Loss: 0.76247
Acc: 4.28%
Epoch: 4, Loss: 0.56312
Acc: 13.73%
Epoch: 6, Loss: 0.52007
Acc: 20.19%
Epoch: 8, Loss: 0.38695
Acc: 24.34%
Epoch: 10, Loss: 0.33419
Acc: 33.58%
Epoch: 12, Loss: 0.27899
Acc: 44.11%
Epoch: 14, Loss: 0.25595
Acc: 41.47%
Epoch: 16, Loss: 0.21022
Acc: 50.84%
Epoch: 18, Loss: 0.39901
Acc: 29.90%
Epoch: 20, Loss: 0.28763
Acc: 46.69%
Epoch: 22, Loss: 0.22793
Acc: 47.99%
Epoch: 24, Loss: 0.27763
Acc: 43.03%
Epoch: 26, Loss: 0.17160
Acc: 66.91%
Epoch: 28, Loss: 0.13706
Acc: 70.44%
Epoch: 30, Loss: 0.15366
Acc: 69.78%
Epoch: 32, Loss: 0.11903
Acc: 72.91%
Epoch: 34, Loss: 0.11094
Acc: 76.33%
Epoch: 36, Loss: 0.10848
Acc: 75.10%
Epoch: 38, Loss: 0.69810
Acc: 3.19%
Epoch: 40, Loss: 0.39384
Acc: 10.68%
Epoch: 42, Loss: 0.29097
Acc: 44.64%
Epoch: 44, Loss: 0.34809
Acc: 33.40%
Epoch: 46, Loss: 0.24415
Acc: 50.91%
Epoch: 48, Loss: 0.20808
Acc: 57.88%
Epoch: 50, Loss: 0.17669
Acc: 62.22%
Epoch: 52, Loss: 0.14714
Acc: 66.

Epoch: 432, Loss: 0.00076
Acc: 100.00%
Epoch: 434, Loss: 0.00072
Acc: 100.00%
Epoch: 436, Loss: 0.00073
Acc: 100.00%
Epoch: 438, Loss: 0.00060
Acc: 100.00%
Epoch: 440, Loss: 0.00056
Acc: 100.00%
Epoch: 442, Loss: 0.00047
Acc: 100.00%
Epoch: 444, Loss: 0.00055
Acc: 100.00%
Epoch: 446, Loss: 0.00044
Acc: 100.00%
Epoch: 448, Loss: 0.00047
Acc: 100.00%
Epoch: 450, Loss: 0.00043
Acc: 100.00%
Epoch: 452, Loss: 0.00047
Acc: 100.00%
Epoch: 454, Loss: 0.00040
Acc: 100.00%
Epoch: 456, Loss: 0.00044
Acc: 100.00%
Epoch: 458, Loss: 0.00039
Acc: 100.00%
Epoch: 460, Loss: 0.00041
Acc: 100.00%
Epoch: 462, Loss: 0.00032
Acc: 100.00%
Epoch: 464, Loss: 0.00031
Acc: 100.00%
Epoch: 466, Loss: 0.00024
Acc: 100.00%
Epoch: 468, Loss: 0.00025
Acc: 100.00%
Epoch: 470, Loss: 0.00023
Acc: 100.00%
Epoch: 472, Loss: 0.00029
Acc: 100.00%
Epoch: 474, Loss: 0.00025
Acc: 100.00%
Epoch: 476, Loss: 0.00023
Acc: 100.00%
Epoch: 478, Loss: 0.00023
Acc: 100.00%
Epoch: 480, Loss: 0.00020
Acc: 100.00%
Epoch: 482, Loss: 0.00019

In [4]:
train_X.shape

torch.Size([9000, 10, 1])

In [5]:
train_Y.shape

torch.Size([9000, 10])

In [6]:
train_Y[0]

tensor([8, 9, 2, 3, 1, 4, 6, 7, 0, 5], device='cuda:0')

In [7]:
train_X[0]

tensor([[84.],
        [65.],
        [36.],
        [51.],
        [74.],
        [95.],
        [76.],
        [77.],
        [ 0.],
        [23.]], device='cuda:0')