Feb 25

For some reason the valid loss does not decrease significantly
on the attention model. Compare with a baseline that does not
include the attention mechanism.

# Imports

In [None]:
%load_ext autoreload
%autoreload 2

from collections import Counter

import torch
from IPython.lib.pretty import pretty
from torch import tensor
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
from torchtext.vocab import Vocab

from notebooks._05_no_attention import util
from notebooks._05_no_attention.classifier import Classifier

# 1 Define train/valid data

In [None]:
util.batch_size = batch_size = 2
util.class_count = class_count = 3
util.emb_size = emb_size = 4
util.sent_count = sent_count = 3
util.sent_len = sent_len = 3

train_data = [
    {
        'classes': [1, 1, 1],
        'sents': [
            'married married married',
            'male male male',
            'American American American'
        ]
    },
    {
        'classes': [0, 0, 0],
        'sents': [
            'single single single',
            'female female female',
            'German German German'
        ]
    },
]

valid_data = [
    {
        'classes': [1, 1, 1],  # married, male, American
        'sents': [
            'Barack is married',
            'Barack is male',
            'Barack is American'
        ]
    },
    {
        'classes': [1, 0, 0],  # married, male, American
        'sents': [
            'Angela is married',
            'Angela is female',
            'Angela is German'
        ]
    }
]

# 2 Pre-processing

## 2.1 Build vocabulary from train data

In [None]:
def tokenize(text):
    return text.split()

train_words = [word for ent in train_data for sent in ent['sents'] for word in tokenize(sent)]
vocab = Vocab(Counter(train_words))

print(pretty(vocab.stoi))

util.vocab = vocab
util.vocab_size = vocab_size = len(vocab)

## 2.2 Transform train/valid data

Map words to tokens and create tensors.

In [None]:
train_sents_batch = tensor([[[vocab[word] for word in tokenize(sent)] for sent in ent['sents']] for ent in train_data])
train_classes_batch = torch.tensor([ent['classes'] for ent in train_data])

valid_sents_batch = tensor([[[vocab[word] for word in tokenize(sent)] for sent in ent['sents']] for ent in valid_data])
valid_classes_batch = torch.tensor([ent['classes'] for ent in valid_data])

# 3 Create classifier

In [None]:
classifier = Classifier(vocab_size, emb_size, class_count)

# 4 Training

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
# criterion = MSELoss()
criterion = BCEWithLogitsLoss()
# criterion = BCEWithLogitsLoss(pos_weight=torch.tensor([80] * class_count))

# optimizer = SGD(classifier.parameters(), lr=0.1)
optimizer = Adam(classifier.parameters(), lr=0.1)

writer = SummaryWriter()


for epoch in range(1000):

    #
    # Train
    #

    train_logits_batch = classifier(train_sents_batch)
    train_loss = criterion(train_logits_batch, train_classes_batch.float())

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    #
    # Validate
    #

    with torch.no_grad():
        valid_logits_batch = classifier(valid_sents_batch)
        valid_loss = criterion(valid_logits_batch, valid_classes_batch.float())

    #
    # Log
    #

    writer.add_scalars('loss', {'train': train_loss, 'valid': valid_loss}, epoch)

    if epoch in [0, 9, 99, 999]:
        print(f'Epoch {epoch}: Train loss = {train_loss.item()}, valid loss = {valid_loss.item()}')

# 5 Test

## 5.1 Define test data

In [None]:
test_data = [
    {
        'classes': [1, 0, 1],  # married, male, American
        'sents': [
            'Michelle is married',
            'Michelle is female',
            'Michelle is American'
        ]
    },
    {
        'classes': [1, 0, 0],  # married, male, American
        'sents': [
            'Angela is married',
            'Angela is female',
            'Angela is German'
        ]
    }
]

## 5.2 Pre-process test data

In [None]:
test_sents_batch = tensor([[[vocab[word] for word in tokenize(sent)] for sent in ent['sents']] for ent in test_data])
test_classes_batch = torch.tensor([ent['classes'] for ent in test_data])

## 5.3 Forward test batch

In [None]:
test_logits_batch = classifier(test_sents_batch)
test_loss = criterion(test_logits_batch, test_classes_batch.float())