In [1]:
# !pip install torch==1.8.0 torchtext==0.9.0
!pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.8.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.8.0%2Bcu111-cp39-cp39-linux_x86_64.whl (1982.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 GB[0m [31m820.6 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.9.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torchvision-0.9.0%2Bcu111-cp39-cp39-linux_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchaudio==0.8.0
  Downloading torchaudio-0.8.0-cp39-cp39-manylinux1_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch, torchvision, torchaudio
  A

# PYTORCH Basics

A tensor is a data structure or data container we use in PyTorch for carrying arrays of numbers.

In [2]:
torch.__version__

NameError: ignored

In [3]:
import torch
import numpy as np

In [4]:
# Construct a tensor from an array
array = [[1, 2], [7, 4], [5, 6]]
tensor0 = torch.tensor(array)
print(tensor0)
print("The data structure type of tensor0: ", type(tensor0))
print("The data type of tensor0: ", tensor0.dtype)
print("The shape of tensor0: ", tensor0.shape)

tensor([[1, 2],
        [7, 4],
        [5, 6]])
The data structure type of tensor0:  <class 'torch.Tensor'>
The data type of tensor0:  torch.int64
The shape of tensor0:  torch.Size([3, 2])


In [5]:
# Construct a tensor from a numpy array
np_array = np.array([[1, 2], [7, 4], [5, 6]])
tensor1 = torch.tensor(np_array)
print(tensor1)

tensor([[1, 2],
        [7, 4],
        [5, 6]])


## Slicing

In [6]:
tensorA = torch.tensor([[1, 1, 1], [2, 2, 2]])
tensorB = torch.tensor([[3, 3, 3], [4, 4, 4]])

In [7]:
# Slicing is all the same as numpy arrays
print('Slicing the first two rows of tensorA (index one inclusive index two exclusive): ')
print(tensorA[:2])
print('Slicing the first two columns of tensorA (take all rows, then slice columns): ')
print(tensorA[:,:2])

Slicing the first two rows of tensorA (index one inclusive index two exclusive): 
tensor([[1, 1, 1],
        [2, 2, 2]])
Slicing the first two columns of tensorA (take all rows, then slice columns): 
tensor([[1, 1],
        [2, 2]])


## Concatenation

In [8]:
print('Vertically concatenate tensorA and tensorB: (default: dim=0)')
concat_v = torch.cat([tensorA, tensorB]) 
print(concat_v)

Vertically concatenate tensorA and tensorB: (default: dim=0)
tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [4, 4, 4]])


In [9]:
print('Horizontally concatenate tensorA and tensorB: (dim=1)')
concat_h = torch.cat([tensorA, tensorB], dim = 1) 
print(concat_h)

Horizontally concatenate tensorA and tensorB: (dim=1)
tensor([[1, 1, 1, 3, 3, 3],
        [2, 2, 2, 4, 4, 4]])


## Preprocessing text dataset

In [10]:
from torchtext import data, datasets
import random

In [11]:
seed = 966
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Fields

In [12]:
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator, LabelField

In [13]:
from torchtext.legacy.datasets  import TREC

In [14]:
TEXT = Field(tokenize = 'spacy', lower = True)
LABEL = LabelField()



In [15]:
train, test = TREC.splits(TEXT, LABEL)
train, val = train.split(random_state=random.seed(seed))

In [16]:
vars(train[-1])

{'text': ['how', 'do', 'you', 'say', '2', 'in', 'latin', '?'], 'label': 'ENTY'}

In [17]:
# build vocab
TEXT.build_vocab(train, min_freq=2)
LABEL.build_vocab(train)

In [18]:
print("Vocabulary size of TEXT:",len(TEXT.vocab.stoi))
print("Vocabulary size of LABEL:",len(LABEL.vocab.stoi))

Vocabulary size of TEXT: 2643
Vocabulary size of LABEL: 6


In [19]:
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train, val, test),
    batch_size = 64,
    sort_key=lambda x: len(x.text), 
    device=device
)

## Build a simple CNN model

In [20]:
import torch.nn as nn
import torch.nn.functional as F

This model uses an embedding layer to represent the input text as dense vectors. It then applies multiple convolutional layers with different filter sizes to capture different n-grams in the text. The outputs of the convolutional layers are pooled using max pooling and concatenated. Finally, the concatenated features are passed through a fully connected layer to produce the output predictions.

In [21]:
class CNN(nn.Module):
  def __init__(self, vocabulary_size, embedding_size, 
               kernels_number, kernel_sizes, output_size, dropout_rate):
    super().__init__()
    self.embedding = nn.Embedding(vocabulary_size, embedding_size)
    self.convolution_layers = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=kernels_number, kernel_size=(k, embedding_size)) 
                                            for k in kernel_sizes])
    self.dropout = nn.Dropout(dropout_rate)
    self.fully_connected = nn.Linear(len(kernel_sizes) * kernels_number, output_size)
  def forward(self, text):
    text = text.permute(1, 0)
    input_embeddings = self.embedding(text)
    input_embeddings = input_embeddings.unsqueeze(1)
    conved = [F.relu(convolution_layer(input_embeddings)).squeeze(3) for convolution_layer in self.convolution_layers]
    pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
    concat = self.dropout(torch.cat(pooled, dim=1))
    final_output = self.fully_connected(concat)
    return final_output

In [22]:
input_size = len(TEXT.vocab)
embedding_size = 100
kernels_number = 100
kernel_sizes = [2, 3, 4]
output_size = len(LABEL.vocab)
dropout_rate = 0.3

In [23]:
model = CNN(input_size, embedding_size, kernels_number, kernel_sizes, output_size, dropout_rate)

In [24]:
print(model)

CNN(
  (embedding): Embedding(2643, 100)
  (convolution_layers): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  )
  (dropout): Dropout(p=0.3, inplace=False)
  (fully_connected): Linear(in_features=300, out_features=6, bias=True)
)


In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

CNN(
  (embedding): Embedding(2643, 100)
  (convolution_layers): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  )
  (dropout): Dropout(p=0.3, inplace=False)
  (fully_connected): Linear(in_features=300, out_features=6, bias=True)
)

## Train and Evaluate Functions

In [26]:
import torch.optim as optim
import torch.nn as nn

In [27]:
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

optimizer = optim.Adam(model.parameters())

In [28]:
def accuracy(predictions, actual_label):
    max_predictions = predictions.argmax(dim = 1, keepdim = True, )
    correct_predictions = max_predictions.squeeze(1).eq(actual_label)
    accuracy = correct_predictions.sum() / torch.cuda.FloatTensor([actual_label.shape[0]])
    return accuracy

In [29]:
def train(model, iterator, optimizer, criterion):

    model.train()
    epoch_loss = 0
    epoch_acc = 0
    
    for batch in iterator:
        optimizer.zero_grad()
        
        predictions = model(batch.text)
        
        loss = criterion(predictions, batch.label)
        
        acc = accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [30]:
def evaluate(model, iterator, criterion):

    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text)
            
            loss = criterion(predictions, batch.label)
            
            acc = accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

## Training the model

In [31]:
number_of_epochs = 20

best_acc = float('-inf')

for epoch in range(number_of_epochs):
    
    # Write the code here
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    # Write the code here
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    if valid_acc > best_acc:
        # Write the code here
        best_acc = valid_acc
        torch.save(model.state_dict(), 'trec.pt')
    
    print(f'Epoch {epoch+1} ')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Validation Loss: {valid_loss:.3f} |  Validation Acc: {valid_acc*100:.2f}%')

Epoch 1 
	Train Loss: 1.308 | Train Acc: 47.92%
	 Validation Loss: 0.956 |  Validation Acc: 64.57%
Epoch 2 
	Train Loss: 0.788 | Train Acc: 71.94%
	 Validation Loss: 0.736 |  Validation Acc: 72.54%
Epoch 3 
	Train Loss: 0.600 | Train Acc: 79.30%
	 Validation Loss: 0.637 |  Validation Acc: 75.61%
Epoch 4 
	Train Loss: 0.465 | Train Acc: 84.93%
	 Validation Loss: 0.615 |  Validation Acc: 75.99%
Epoch 5 
	Train Loss: 0.361 | Train Acc: 89.19%
	 Validation Loss: 0.554 |  Validation Acc: 78.60%
Epoch 6 
	Train Loss: 0.279 | Train Acc: 91.90%
	 Validation Loss: 0.532 |  Validation Acc: 80.71%
Epoch 7 
	Train Loss: 0.210 | Train Acc: 94.14%
	 Validation Loss: 0.514 |  Validation Acc: 80.63%
Epoch 8 
	Train Loss: 0.168 | Train Acc: 95.81%
	 Validation Loss: 0.522 |  Validation Acc: 81.67%
Epoch 9 
	Train Loss: 0.140 | Train Acc: 96.45%
	 Validation Loss: 0.515 |  Validation Acc: 82.38%
Epoch 10 
	Train Loss: 0.113 | Train Acc: 97.48%
	 Validation Loss: 0.509 |  Validation Acc: 82.38%
Epoch 11 

In [32]:
model.load_state_dict(torch.load('trec.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.422 | Test Acc: 89.87%
