In [13]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
!pip install torchdata==0.4.1



In [15]:
!pip install torchtext==0.13.1



In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchtext
from torch.nn.utils.rnn import pad_sequence

import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm

In [17]:
print(torch.__version__)
print(torchtext.__version__)

1.12.1+cu102
0.13.1


## Tokenization

In [18]:
from torchtext import transforms as T

VOCAB_FILE = "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt"

tokenizer = T.BERTTokenizer(
    vocab_path=VOCAB_FILE, do_lower_case=True, return_tokens=True
)

## Vocab (Vectorization)

In [19]:
from torchtext.vocab import GloVe

vocab = GloVe(name="6B", dim=50)

## Dataset

In [20]:
from torchtext import datasets

train_set, test_set = datasets.AG_NEWS("/content/", split=("train", "test"))
train_set, test_set

(ShardingFilterIterDataPipe, ShardingFilterIterDataPipe)

In [36]:
def collate(batch):
    labels = torch.LongTensor([b[0] for b in batch]) - 1
    text = [b[1] for b in batch]
    lines = tokenizer(text)
    vecs = [vocab.get_vecs_by_tokens(line) for line in lines]
    vecs = pad_sequence(vecs)

    return vecs, labels

In [59]:
train_loader = DataLoader(train_set, batch_size=64, shuffle=True, collate_fn=collate)
test_loader = DataLoader(test_set, batch_size=64, collate_fn=collate)

In [44]:
x, y = next(iter(train_loader))
x.shape, y

(torch.Size([169, 64, 50]),
 tensor([2, 0, 1, 1, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 0, 0, 1, 0, 1, 0, 2, 2, 3, 2,
         2, 0, 3, 1, 3, 3, 1, 3, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2,
         0, 0, 1, 1, 1, 0, 1, 3, 0, 3, 3, 2, 1, 0, 3, 0]))

## Utils

In [60]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

## Init

In [61]:
num_cls = 4

device = "cuda" if torch.cuda.is_available() else "cpu"

# **Model**

In [62]:
class CNNModel(nn.Module):
    def __init__(self, embed_dim, filter_sizes, num_filters, num_classes, dropout):
        super(CNNModel, self).__init__()
        self.conv1d_list = nn.ModuleList(
            [
                nn.Conv1d(
                    in_channels=embed_dim,
                    out_channels=num_filters[i],
                    kernel_size=filter_sizes[i],
                )
                for i in range(len(filter_sizes))
            ]
        )
        self.fc = nn.Linear(sum(num_filters), num_classes)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        x = x.permute(1, 2, 0)  # x shape: (batch_size, embedding_dim, max_seq_length)

        # Apply convolutional filters
        conv_outputs = []
        for conv in self.conv1d_list:
            conv_output = conv(
                x
            )  # conv_output shape: (batch_size, num_filters, conv_seq_length)
            conv_output = F.relu(conv_output)
            conv_output = F.max_pool1d(conv_output, conv_output.size(2)).squeeze(
                2
            )  # conv_output shape: (batch_size, num_filters)
            conv_outputs.append(conv_output)

        x = torch.cat(
            conv_outputs, 1
        )  # x shape: (batch_size, num_filters * len(filter_sizes))
        x = self.dropout(x)
        logits = self.fc(x)  # logits shape: (batch_size, num_classes)

        return logits

## Functions

In [63]:
def train_one_epoch(model, train_loader, loss_fn, optimizer, epoch=None):
    model.train()
    loss_train = AverageMeter()
    correct = 0
    total = 0

    with tqdm(train_loader, unit="batch") as tepoch:
        for inputs, targets in tepoch:
            if epoch is not None:
                tepoch.set_description(f"Epoch {epoch}")
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            loss = loss_fn(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train.update(loss.item())

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

            tepoch.set_postfix(loss=loss_train.avg, accuracy=100 * correct / total)

    accuracy = 100 * correct / total
    return model, loss_train.avg, accuracy

In [64]:
def validation(model, test_loader, loss_fn):
    model.eval()
    loss_valid = AverageMeter()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            loss = loss_fn(outputs, targets)

            loss_valid.update(loss.item())

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    return loss_valid.avg, accuracy

## Train

### Step 1: check forward path

Calculate loss for one batch

In [58]:
model = CNNModel(
    embed_dim=50,
    filter_sizes=[3, 4, 5],
    num_filters=[100, 100, 100],
    num_classes=num_cls,
    dropout=0.5,
).to(device)
loss_fn = nn.CrossEntropyLoss()

x_batch, y_batch = next(iter(train_loader))
print(x_batch.shape, y_batch.shape)
outputs = model(x_batch.to(device))
loss = loss_fn(outputs, y_batch.to(device))
print(loss)

torch.Size([123, 64, 50]) torch.Size([64])
tensor(1.6286, grad_fn=<NllLossBackward0>)


### Step 2: select best lr

Train all data for one epoch

In [49]:
num_epochs = 1
for lr in [0.1, 0.01, 0.001, 0.0001]:
    print(f"LR={lr}")
    model = CNNModel(
        embed_dim=50,
        filter_sizes=[3, 4, 5],
        num_filters=[100, 100, 100],
        num_classes=num_cls,
        dropout=0.5,
    ).to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=1e-4, momentum=0.9)
    for epoch in range(num_epochs):
        model, _, _ = train_one_epoch(model, train_loader, loss_fn, optimizer, epoch)
    print()

LR=0.1
1


Epoch 0: : 1875batch [03:54,  7.99batch/s, accuracy=79.3, loss=0.602]



LR=0.01
1


Epoch 0: : 1875batch [03:57,  7.89batch/s, accuracy=85.4, loss=0.413]



LR=0.001
1


Epoch 0: : 1875batch [03:59,  7.84batch/s, accuracy=75.9, loss=0.648]



LR=0.0001
1


Epoch 0: : 1875batch [03:59,  7.83batch/s, accuracy=44.3, loss=1.25]







### Step 3: train more epochs

In [65]:
model = CNNModel(
    embed_dim=50,
    filter_sizes=[3, 4, 5],
    num_filters=[100, 100, 100],
    num_classes=num_cls,
    dropout=0.5,
).to(device)

lr = 0.01
wd = 1e-4
optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=wd, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()

loss_train_hist = []
loss_valid_hist = []

acc_train_hist = []
acc_valid_hist = []

best_loss_valid = torch.inf
epoch_counter = 0

In [66]:
num_epochs = 15

for epoch in range(num_epochs):
    # Train
    model, loss_train, acc_train = train_one_epoch(
        model, train_loader, loss_fn, optimizer, epoch
    )
    # Validation
    loss_valid, acc_valid = validation(model, test_loader, loss_fn)

    loss_train_hist.append(loss_train)
    loss_valid_hist.append(loss_valid)

    acc_train_hist.append(acc_train)
    acc_valid_hist.append(acc_valid)

    if loss_valid < best_loss_valid:
        torch.save(model, f"model.pt")
        best_loss_valid = loss_valid
        print("Model Saved!")

    print(f"Valid: Loss = {loss_valid:.4}, Acc = {acc_valid:.4}")
    print()

    epoch_counter += 1

Epoch 0: : 1875batch [03:54,  8.01batch/s, accuracy=85.5, loss=0.412]


Model Saved!
Valid: Loss = 0.331, Acc = 89.22



Epoch 1: : 1875batch [04:03,  7.69batch/s, accuracy=88.7, loss=0.331]


Model Saved!
Valid: Loss = 0.331, Acc = 89.21



Epoch 2: : 1875batch [04:08,  7.55batch/s, accuracy=89.4, loss=0.309]


Model Saved!
Valid: Loss = 0.3025, Acc = 89.99



Epoch 3: : 1875batch [03:55,  7.96batch/s, accuracy=89.8, loss=0.296]


Model Saved!
Valid: Loss = 0.2945, Acc = 90.29



Epoch 4: : 1875batch [03:57,  7.89batch/s, accuracy=90.1, loss=0.285]


Model Saved!
Valid: Loss = 0.2942, Acc = 90.16



Epoch 5: : 1875batch [03:55,  7.96batch/s, accuracy=90.5, loss=0.278]


Valid: Loss = 0.3281, Acc = 88.92



Epoch 6: : 1875batch [03:56,  7.92batch/s, accuracy=90.7, loss=0.271]


Valid: Loss = 0.2986, Acc = 90.32



Epoch 7: : 1875batch [03:58,  7.88batch/s, accuracy=90.9, loss=0.264]


Model Saved!
Valid: Loss = 0.2847, Acc = 90.64



Epoch 8: : 1875batch [03:56,  7.92batch/s, accuracy=90.9, loss=0.26]


Valid: Loss = 0.2853, Acc = 90.8



Epoch 9: : 1875batch [03:56,  7.92batch/s, accuracy=91.1, loss=0.257]


Model Saved!
Valid: Loss = 0.2789, Acc = 90.99



Epoch 10: : 1875batch [03:53,  8.03batch/s, accuracy=91.3, loss=0.251]


Valid: Loss = 0.2802, Acc = 91.09



Epoch 11: : 1875batch [03:56,  7.94batch/s, accuracy=91.4, loss=0.248]


Valid: Loss = 0.2845, Acc = 90.82



Epoch 12: : 1875batch [03:57,  7.89batch/s, accuracy=91.5, loss=0.246]


Valid: Loss = 0.2912, Acc = 90.64



Epoch 13: : 1875batch [03:59,  7.82batch/s, accuracy=91.6, loss=0.241]


Valid: Loss = 0.2837, Acc = 90.79



Epoch 14: : 1875batch [03:57,  7.91batch/s, accuracy=91.6, loss=0.241]


Valid: Loss = 0.2808, Acc = 90.96

