# Policy RNN for Part-of-Speech Tagging

In [None]:
%load_ext autoreload
%autoreload 2

As a last exercise, apply what you have learned to the RNN model seen in previous days. Implement REINFORCE to replace the maximum likelihood loss used on the RNN day. For this you can modify the PolicyRNN class in lxmls/deep learning/pytorch\_models/rnn.py

### WSJ Data

In [4]:
import sys; sys.path.append("../../../")
# Load Part-of-Speech data 
from lxmls.readers.pos_corpus import PostagCorpusData
data = PostagCorpusData()

### Train model with Cross entropy

In [10]:
print( data.input_size)
print( data.output_size)

4786
12


In [12]:
# Alterbative native CuDNN native implementation of RNNs
from lxmls.deep_learning.pytorch_models.rnn import FastPytorchRNN
model = FastPytorchRNN(
    input_size=data.input_size,
    embedding_size=50,
    hidden_size=20,
    output_size=data.output_size,
    learning_rate=0.1
)

In [15]:
num_epochs = 10

In [16]:
import numpy as np
import time

# Get batch iterators for train and test
train_batches = data.batches('train', batch_size=1)
dev_set = data.batches('dev', batch_size=1)
test_set = data.batches('test', batch_size=1)

# Epoch loop
start = time.time()
for epoch in range(num_epochs):

    # Batch loop
    for batch in train_batches:
        model.update(input=batch['input'], output=batch['output'])

    # Evaluation dev
    is_hit = []
    for batch in dev_set:
        is_hit.extend(model.predict(input=batch['input']) == batch['output'])
    accuracy = 100*np.mean(is_hit)

    # Inform user
    print("Epoch %d: dev accuracy %2.2f %%" % (epoch+1, accuracy))

print("Training took %2.2f seconds per epoch" % ((time.time() - start)/num_epochs))    
    
# Evaluation test
is_hit = []
for batch in test_set:
    is_hit.extend(model.predict(input=batch['input']) == batch['output'])
accuracy = 100*np.mean(is_hit)

# Inform user
print("Test accuracy %2.2f %%" % accuracy)

Epoch 1: dev accuracy 76.16 %
Epoch 2: dev accuracy 84.89 %
Epoch 3: dev accuracy 86.89 %
Epoch 4: dev accuracy 86.84 %
Epoch 5: dev accuracy 87.38 %
Epoch 6: dev accuracy 87.46 %
Epoch 7: dev accuracy 87.55 %
Epoch 8: dev accuracy 87.32 %
Epoch 9: dev accuracy 87.15 %
Epoch 10: dev accuracy 87.15 %
Training took 2.42 seconds per epoch
Test accuracy 86.87 %


### Train model with [LOSS REQUIRING RL]

In [27]:
# Example of sampling
print(train_batches[3]['input'])
samples, log_probs = model._sample(input=train_batches[3]['input'])
samples, log_probs

[24 25 26 27 28 29 30  4]


(tensor([ 3,  0,  0,  1,  7,  1,  1,  2]), tensor(1.00000e-03 *
        [ 3.9363,  4.6798,  0.2728,  0.3790,  0.8567,  0.0519,  1.4068,
          0.1815]))

In [None]:
# Epoch loop
start = time.time()
for epoch in range(num_epochs):

    # Batch loop
    for batch in train_batches:
        # TODO: Use this here to create an RL inside model.update()
        samples, log_probs = model._sample(input=batch['input']) #sample actions and its neg log probs
        raise NotImplementedError

    # Evaluation dev
    is_hit = []
    for batch in dev_set:
        is_hit.extend(model.predict(input=batch['input']) == batch['output'])
    accuracy = 100*np.mean(is_hit)

    # Inform user
    print("Epoch %d: dev accuracy %2.2f %%" % (epoch+1, accuracy))

print("Training took %2.2f seconds per epoch" % ((time.time() - start)/num_epochs))    
    
# Evaluation test
is_hit = []
for batch in test_set:
    is_hit.extend(model.predict(input=batch['input']) == batch['output'])
accuracy = 100*np.mean(is_hit)

# Inform user
print("Test accuracy %2.2f %%" % accuracy)