### Amazon Sentiment Data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import lxmls.readers.sentiment_reader as srs
from lxmls.deep_learning.utils import AmazonData
corpus = srs.SentimentCorpus("books")
data = AmazonData(corpus=corpus)

### Train Log Linear in Pytorch
In order to learn the differences between a numpy and a Pytorch implementation, explore the reimplementation of Ex. 3.1 in Pytorch. Compare the content of each of the functions, in particular the `forward()` and `update()` methods. The comments indicated as IMPORTANT will highlight common sources of errors.

In [None]:
from lxmls.deep_learning.utils import Model, glorot_weight_init
import numpy as np
import torch
from torch.autograd import Variable

class PytorchLogLinear(Model):

    def __init__(self, **config):

        # Initialize parameters
        weight_shape = (config['input_size'], config['num_classes'])
        # after Xavier Glorot et al
        weight_np = glorot_weight_init(weight_shape, 'softmax')  # why is this not imported from utils?
        self.learning_rate = config['learning_rate']  # why is the learning rate an attribute?

        # IMPORTANT: Cast to pytorch format
        self.weight = torch.from_numpy(weight_np).float()
        self.weight.requires_grad = True

        self.bias = torch.zeros(1, config['num_classes'], requires_grad=True)

        self.loss_function = torch.nn.NLLLoss()

    def _log_forward(self, input=None):
        """Forward pass of the computation graph in logarithm domain (pytorch)"""

        # IMPORTANT: Cast to pytorch format
        input = torch.from_numpy(input).float()

        # Linear transformation
        z =  torch.matmul(input, torch.t(self.weight)) + self.bias

        # Softmax implemented in log domain
        log_tilde_z = torch.log_softmax(z, dim=-1)

        # NOTE that this is a pytorch class!
        return log_tilde_z

    def predict(self, input=None):
        """Most probable class index"""
        log_forward = self._log_forward(input).data.numpy()
        return np.argmax(log_forward, axis=1)

    def update(self, input=None, output=None):
        """Stochastic Gradient Descent update"""

        # IMPORTANT: Class indices need to be casted to LONG
        true_class = torch.from_numpy(output).long()

        # Compute negative log-likelihood loss
        loss = self.loss_function(self._log_forward(input), true_class)

        # Use autograd to compute the backward pass.
        loss.backward()

        # SGD update
        self.weight.data -= self.learning_rate * self.weight.grad.data
        self.bias.data -= self.learning_rate * self.bias.grad.data

        # Zero gradients
        self.weight.grad.data.zero_()
        self.bias.grad.data.zero_()

        return loss.data.numpy()

Once you understand the model you can instantiate it and run it using the standard training loop we have used on previous exercises.

In [None]:
model = PytorchLogLinear(
    input_size=corpus.nr_features,
    num_classes=2, 
    learning_rate=0.05
)

In [None]:
# Hyper-parameters
num_epochs = 10
batch_size = 30

# Get batch iterators for train and test
train_batches = data.batches('train', batch_size=batch_size)
test_set = data.batches('test', batch_size=None)[0]

# Epoch loop
for epoch in range(num_epochs):

    # Batch loop
    for batch in train_batches:
        model.update(input=batch['input'], output=batch['output'])

    # Prediction for this epoch
    hat_y = model.predict(input=test_set['input'])

    # Evaluation
    accuracy = 100*np.mean(hat_y == test_set['output'])

    # Inform user
    print("Epoch %d: accuracy %2.2f %%" % (epoch+1, accuracy))