## Text Classification - MLP

In [2]:
# !pip install -U torchtext==0.17.0

## setup

In [3]:
import torch
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

corpus = [
    "Deepseek sell off stock market",
    "Deepseek release all their technical documents"
]
data_size = len(corpus)

# 0: negative - 1: positive
labels = [0, 1]

# Define the max vocabulary size and sequence length
vocab_size = 8
sequence_length = 5

ModuleNotFoundError: No module named 'torch'

In [3]:
from torchtext.data.utils import get_tokenizer
# version 0.17.0

sample1 = corpus[0]
sample2 = corpus[1]

#Define tokenizer function
tokenizer = get_tokenizer('basic_english')
sample1_tokens = tokenizer(sample1)
sample2_tokens = tokenizer(sample2)

print(sample1_tokens)
print(sample2_tokens)

['deepseek', 'sell', 'off', 'stock', 'market']
['deepseek', 'release', 'all', 'their', 'technical', 'documents']


In [5]:
sample1_ids = [vocab[token] for token in sample1_tokens]
sample2_ids = [vocab[token] for token in sample2_tokens]

print(sample1_ids)
print(sample2_ids)

[2, 0, 6, 0, 5]
[2, 7, 3, 0, 0, 4]


In [4]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

# Define tokenizer function
tokenizer = get_tokenizer('basic_english')

# Create a function to yield list of tokens
def yield_tokens(examples):
    for text in examples:
        yield tokenizer(text)

# Create vocabulary
vocab = build_vocab_from_iterator(yield_tokens(corpus),
                                  max_tokens=vocab_size,
                                  specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])
vocab.get_stoi()

{'release': 7,
 'off': 6,
 'market': 5,
 'documents': 4,
 'deepseek': 2,
 '<pad>': 1,
 'all': 3,
 '<unk>': 0}

In [6]:
# Tokenize and numericalize your samples
def vectorize(text, vocab, sequence_length):
    tokens = tokenizer(text)
    token_ids = [vocab[token] for token in tokens][:sequence_length]
    
    #? add padding for blank space left in sequence_length
    #? multiply list example: [0] * 2 = [0, 0] 
    #? list addition: [1, 2] + [0]*2 = [1, 2, 0, 0]
    #? (sequence_length - len(tokens) mean adding the special token vocab["<pad>"] 
    #? to token_ids list by (sequence_length - current_sequence_length). 
    token_ids = token_ids + [vocab["<pad>"]] * (sequence_length - len(tokens))
    
    return torch.tensor(token_ids, dtype=torch.long)


# Vectorize the samples
corpus_ids = []
for sentence in corpus:
    corpus_ids.append(vectorize(sentence, vocab, sequence_length))

In [7]:
corpus_ids

[tensor([2, 0, 6, 0, 5]), tensor([2, 7, 3, 0, 0])]

In [8]:
for v in corpus_ids:
    print(v)

tensor([2, 0, 6, 0, 5])
tensor([2, 7, 3, 0, 0])


In [9]:
vocab_size = 8
embedding_dim = 2
embedding = nn.Embedding(vocab_size, embedding_dim)
custom_weights = torch.tensor( [[-0.1882,  0.5530],
                                [ 1.7840, -0.8278],
                                [ 1.0281, -1.9094],
                                [-1.3083, -0.0987],
                                [ 0.2293,  1.3255],
                                [ 0.4058, -0.6624],
                                [ 0.5582,  0.0786],
                                [ 0.4309, -1.3067]]).float()
embedding.weight = nn.Parameter(custom_weights)

print(embedding.weight)

Parameter containing:
tensor([[-0.1882,  0.5530],
        [ 1.7840, -0.8278],
        [ 1.0281, -1.9094],
        [-1.3083, -0.0987],
        [ 0.2293,  1.3255],
        [ 0.4058, -0.6624],
        [ 0.5582,  0.0786],
        [ 0.4309, -1.3067]], requires_grad=True)


In [10]:
embedding = nn.Embedding(vocab_size, 2)
custom_weights = torch.tensor( [[-0.1882,  0.5530],
                                [ 1.7840, -0.8278],
                                [ 1.0281, -1.9094],
                                [-1.3083, -0.0987],
                                [ 0.2293,  1.3255],
                                [ 0.4058, -0.6624],
                                [ 0.5582,  0.0786],
                                [ 0.4309, -1.3067]]).float()
embedding.weight = nn.Parameter(custom_weights)


fc = nn.Linear(10, 2)
fc_weights = torch.tensor( [[0.2108, -0.0074,  0.2760,  0.2325, -0.0518, -0.1876,  0.0194, 0.0378, 0.0210, 0.2982],
                            [0.0284,  0.2968, -0.0260,  0.1251, -0.0282,  0.0175, -0.1817, 0.2483, 0.2338, 0.2985]]).float()
fc.weight = nn.Parameter(fc_weights)

fc_bias = torch.tensor([-0.3049,  0.1028]).float()
fc.bias = nn.Parameter(fc_bias)

flatten = nn.Flatten()
model = nn.Sequential(embedding, flatten, fc)

In [11]:
print(model)

Sequential(
  (0): Embedding(8, 2)
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=10, out_features=2, bias=True)
)


In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                             lr=0.1)

# Hand on

In [13]:
input_1 = torch.tensor([[2, 0, 6, 0, 5]], dtype=torch.long)
label_1 = torch.tensor([0], dtype=torch.long)

In [14]:
embedded_output = embedding(input_1)
print("Embedded Output:\n", embedded_output, embedded_output.shape)
flattened_output = flatten(embedded_output)
print("Flattened Output:\n", flattened_output.shape)

Embedded Output:
 tensor([[[ 1.0281, -1.9094],
         [-0.1882,  0.5530],
         [ 0.5582,  0.0786],
         [-0.1882,  0.5530],
         [ 0.4058, -0.6624]]], grad_fn=<EmbeddingBackward0>) torch.Size([1, 5, 2])
Flattened Output:
 torch.Size([1, 10])


In [15]:
w1 = fc_weights[0]
w2 = fc_weights[1]
w2

tensor([ 0.0284,  0.2968, -0.0260,  0.1251, -0.0282,  0.0175, -0.1817,  0.2483,
         0.2338,  0.2985])

In [16]:
result = []
for i, x in enumerate(flattened_output[0]):
    result.append(x * w1[i])

result, sum(result)

([tensor(0.2167, grad_fn=<MulBackward0>),
  tensor(0.0141, grad_fn=<MulBackward0>),
  tensor(-0.0519, grad_fn=<MulBackward0>),
  tensor(0.1286, grad_fn=<MulBackward0>),
  tensor(-0.0289, grad_fn=<MulBackward0>),
  tensor(-0.0147, grad_fn=<MulBackward0>),
  tensor(-0.0037, grad_fn=<MulBackward0>),
  tensor(0.0209, grad_fn=<MulBackward0>),
  tensor(0.0085, grad_fn=<MulBackward0>),
  tensor(-0.1975, grad_fn=<MulBackward0>)],
 tensor(0.0921, grad_fn=<AddBackward0>))

In [17]:
result = []
for i, x in enumerate(flattened_output[0]):
    result.append(x * w2[i])

result, sum(result)

([tensor(0.0292, grad_fn=<MulBackward0>),
  tensor(-0.5667, grad_fn=<MulBackward0>),
  tensor(0.0049, grad_fn=<MulBackward0>),
  tensor(0.0692, grad_fn=<MulBackward0>),
  tensor(-0.0157, grad_fn=<MulBackward0>),
  tensor(0.0014, grad_fn=<MulBackward0>),
  tensor(0.0342, grad_fn=<MulBackward0>),
  tensor(0.1373, grad_fn=<MulBackward0>),
  tensor(0.0949, grad_fn=<MulBackward0>),
  tensor(-0.1977, grad_fn=<MulBackward0>)],
 tensor(-0.4091, grad_fn=<AddBackward0>))

In [18]:
fc_output = fc(flattened_output)
print("FC Output:\n", fc_output, fc_output.shape)

FC Output:
 tensor([[-0.2128, -0.3063]], grad_fn=<AddmmBackward0>) torch.Size([1, 2])


In [19]:
import math
math.e**(-0.2128)

0.8083177846081321

In [20]:
math.e**(-0.3063)

0.7361657366043477

In [21]:
0.7361/1.5445

0.4765943671090968

## Train sample-1 once, and check gradient and weight

In [22]:
# first sample

input_1 = torch.tensor([[2, 0, 6, 0, 5]], dtype=torch.long)
label_1 = torch.tensor([0], dtype=torch.long)

optimizer.zero_grad()
outputs = model(input_1)
print(outputs)
print(torch.softmax(outputs, axis=-1))

loss = criterion(outputs, label_1)
print(loss)

loss.backward()
optimizer.step()

tensor([[-0.2128, -0.3063]], grad_fn=<AddmmBackward0>)
tensor([[0.5234, 0.4766]], grad_fn=<SoftmaxBackward0>)
tensor(0.6475, grad_fn=<NllLossBackward0>)


In [23]:
print("\n embedding.weight \n")
print(embedding.weight)

print("\n embedding.weight.grad \n")
print(embedding.weight.grad)

print("\n fc.weight \n")
print(fc.weight)

print("\n fc.bias \n")
print(fc.bias)


 embedding.weight 

Parameter containing:
tensor([[-0.1642,  0.5481],
        [ 1.7840, -0.8278],
        [ 1.0368, -1.9239],
        [-1.3083, -0.0987],
        [ 0.2293,  1.3255],
        [ 0.3957, -0.6624],
        [ 0.5571,  0.0688],
        [ 0.4309, -1.3067]], requires_grad=True)

 embedding.weight.grad 

tensor([[-2.3980e-01,  4.9141e-02],
        [ 0.0000e+00,  0.0000e+00],
        [-8.6939e-02,  1.4499e-01],
        [ 0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00],
        [ 1.0143e-01,  1.4298e-04],
        [ 1.1249e-02,  9.7758e-02],
        [ 0.0000e+00,  0.0000e+00]])

 fc.weight 

Parameter containing:
tensor([[ 0.2598, -0.0984,  0.2670,  0.2589, -0.0252, -0.1839,  0.0104,  0.0642,
          0.0403,  0.2666],
        [-0.0206,  0.3878, -0.0170,  0.0987, -0.0548,  0.0138, -0.1727,  0.2219,
          0.2145,  0.3301]], requires_grad=True)

 fc.bias 

Parameter containing:
tensor([-0.2572,  0.0551], requires_grad=True)


## check if the loss reduces

In [24]:
optimizer.zero_grad()
outputs = model(input_1)
print(outputs)
print(torch.softmax(outputs, axis=-1))

loss = criterion(outputs, label_1)
print(loss)

tensor([[ 0.1456, -0.6688]], grad_fn=<AddmmBackward0>)
tensor([[0.6930, 0.3070]], grad_fn=<SoftmaxBackward0>)
tensor(0.3667, grad_fn=<NllLossBackward0>)


## Train several epochs (ignore this part)

In [25]:
inputs = torch.tensor([[4, 3, 0, 6, 3],
                       [2, 5, 7, 2, 0]], dtype=torch.long)
labels = torch.tensor([0, 1], dtype=torch.long)

for _ in range(5):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    print(loss)

    loss.backward()
    optimizer.step()

tensor(1.6064, grad_fn=<NllLossBackward0>)
tensor(0.9049, grad_fn=<NllLossBackward0>)
tensor(0.5502, grad_fn=<NllLossBackward0>)
tensor(0.3779, grad_fn=<NllLossBackward0>)
tensor(0.2824, grad_fn=<NllLossBackward0>)


## Check if the model has adready learned sucessfuly

In [26]:
outputs = model(inputs)
print(outputs)
print(torch.softmax(outputs, axis=-1))

tensor([[ 0.2058, -0.8800],
        [-1.3359,  0.4520]], grad_fn=<AddmmBackward0>)
tensor([[0.7476, 0.2524],
        [0.1433, 0.8567]], grad_fn=<SoftmaxBackward0>)
