In [None]:
# !pip install -U torchtext==0.17.0

## Data

In [None]:
import torch
import torch.nn as nn

corpus = [
    "AI is growing fast",
    "Models are large"
]

data_size = len(corpus)

# 0: noun/pronoun - 1: verb - others - 2
labels = [[0, 1, 1, 2],
          [0, 1, 2]]

# Define the max vocabulary size and sequence length
vocab_size = 9
sequence_length = 4

In [None]:
from torchtext.data.utils import get_tokenizer

sample1 = corpus[0]
sample2 = corpus[1]

# Define tokenizer
tokenizer = get_tokenizer('basic_english')

# Tokenize sample 1
sample1_tokens = tokenizer(sample1)
print(sample1_tokens)

# Tokenize sample 2
sample2_tokens = tokenizer(sample2)
print(sample2_tokens)


['ai', 'is', 'growing', 'fast']
['models', 'are', 'large']


In [None]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

# Define tokenizer
tokenizer = get_tokenizer('basic_english')

# Create a function to yield list of tokens
def yield_tokens(examples):
    for text in examples:
        yield tokenizer(text)

# Create vocabulary
vocab = build_vocab_from_iterator(yield_tokens(corpus),
                                  max_tokens=vocab_size,
                                  specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])

In [None]:
vocab.get_stoi()

{'is': 6,
 'growing': 5,
 'models': 8,
 'are': 3,
 'ai': 2,
 'fast': 4,
 '<pad>': 1,
 'large': 7,
 '<unk>': 0}

In [None]:
# Tokenize and numericalize your samples
def vectorize(text, vocab, sequence_length, sequence_label):
    tokens = tokenizer(text)

    token_ids = [vocab[token] for token in tokens]
    token_ids = token_ids + [vocab["<pad>"]] * (sequence_length - len(tokens))
    sequence_label = sequence_label + [3] * (sequence_length - len(tokens))

    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(sequence_label, dtype=torch.long)

# Vectorize the samples
sentence_vecs = []
label_vecs = []
for sentence, labels in zip(corpus, labels):
    sentence_vec, labels_vec = vectorize(sentence, vocab, sequence_length, labels)
    sentence_vecs.append(sentence_vec)
    label_vecs.append(labels_vec)

In [None]:
for v in sentence_vecs:
    print(v)

tensor([2, 6, 5, 4])
tensor([8, 3, 7, 1])


In [None]:
for v in label_vecs:
    print(v)

tensor([0, 1, 1, 2])
tensor([0, 1, 2, 3])


## Model

In [None]:
class POS_Model(nn.Module):
    def __init__(self, vocab_size, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, 4)
        self.fc = nn.Linear(4, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.fc(x)
        x = x.permute(0, 2, 1)
        return x

model = POS_Model(vocab_size, 4)

In [None]:
# Embedding weight
custom_weights = torch.tensor( [[ 0.7321, -1.2503,  0.8154, -0.4987],
                                [-0.2165,  1.7823, -0.6892,  0.9476],
                                [ 1.1043,  0.3289, -1.5284,  0.6520],
                                [-0.8116,  0.5672,  0.2031, -0.9725],
                                [ 0.4528, -0.1476,  1.2930,  0.7842],
                                [-1.3459,  0.6791, -0.8853,  1.1207],
                                [ 0.9823, -0.5504,  0.4389, -1.2396],
                                [ 0.7415,  1.1048, -0.3207, -0.4871],
                                [-0.2158, -1.3749,  0.9036,  0.6724]]).float()
model.embedding.weight = nn.Parameter(custom_weights)

# FC weight
fc_weights = torch.tensor( [[ 0.3748, -0.1062,  0.5347,  0.0573],
                            [-0.1189,  0.0972, -0.4471,  0.5312],
                            [ 0.2925,  0.6210, -0.6989, -0.0523],
                            [-0.0912,  0.1413,  0.3324,  0.3857]]).float()
model.fc.weight = nn.Parameter(fc_weights)

# FC bias
fc_bias = torch.tensor([ 0.1987, -0.3745,  0.5632, -0.1256]).float()
model.fc.bias = nn.Parameter(fc_bias)

In [None]:
input_1 = torch.tensor([[8]], dtype=torch.long)
output = model(input_1)
print(output.shape)

torch.Size([1, 4, 1])


## Train

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=3)
optimizer = torch.optim.Adam(model.parameters(),
                             lr=0.1)

In [None]:
# 2nd sample
input_1 = torch.tensor([[8, 3, 7, 1]], dtype=torch.long)
label_1 = torch.tensor([[0, 1, 2, 3]], dtype=torch.long)

optimizer.zero_grad()
outputs = model(input_1)
o_softmax = torch.softmax(outputs, axis=1)
print(o_softmax[0, :, 0])
print(o_softmax[0, :, 1])
print(o_softmax[0, :, 2])
print(o_softmax[0, :, 3])

print(outputs.shape)
print(label_1.shape)


tensor([0.4941, 0.1327, 0.0812, 0.2920], grad_fn=<SelectBackward0>)
tensor([0.2300, 0.1122, 0.4630, 0.1948], grad_fn=<SelectBackward0>)
tensor([0.1453, 0.0773, 0.6885, 0.0889], grad_fn=<SelectBackward0>)
tensor([0.0587, 0.1632, 0.6635, 0.1146], grad_fn=<SelectBackward0>)
torch.Size([1, 4, 4])
torch.Size([1, 4])


In [None]:
loss = criterion(outputs, label_1)
print(loss)

loss.backward()
optimizer.step()

tensor(1.0885, grad_fn=<NllLoss2DBackward0>)


In [None]:
print("\n embedding.weight \n")
print(model.embedding.weight)

print("\n fc.weight \n")
print(model.fc.weight)

print("\n fc.bias \n")
print(model.fc.bias)


 embedding.weight 

Parameter containing:
tensor([[ 0.7321, -1.2503,  0.8154, -0.4987],
        [-0.2165,  1.7823, -0.6892,  0.9476],
        [ 1.1043,  0.3289, -1.5284,  0.6520],
        [-0.9116,  0.4672,  0.1031, -0.8725],
        [ 0.4528, -0.1476,  1.2930,  0.7842],
        [-1.3459,  0.6791, -0.8853,  1.1207],
        [ 0.9823, -0.5504,  0.4389, -1.2396],
        [ 0.8415,  1.2048, -0.4207, -0.5871],
        [-0.1158, -1.4749,  1.0036,  0.5724]], requires_grad=True)

 fc.weight 

Parameter containing:
tensor([[ 0.2748, -0.2062,  0.6347,  0.1573],
        [-0.2189,  0.1972, -0.3471,  0.4312],
        [ 0.3925,  0.7210, -0.7989,  0.0477],
        [ 0.0088,  0.2413,  0.2324,  0.4857]], requires_grad=True)

 fc.bias 

Parameter containing:
tensor([ 0.2987, -0.2745,  0.4632, -0.2256], requires_grad=True)


In [None]:
input_1

tensor([[8, 3, 7, 1]])

In [None]:
optimizer.zero_grad()
outputs = model(input_1)
o_softmax = torch.softmax(outputs, axis=1)
print(o_softmax[0, :, 0])
print(o_softmax[0, :, 1])
print(o_softmax[0, :, 2])
print(o_softmax[0, :, 3])

loss = criterion(outputs, label_1)
print(loss)

loss.backward()
optimizer.step()

tensor([0.6830, 0.0982, 0.0451, 0.1737], grad_fn=<SelectBackward0>)
tensor([0.2513, 0.1909, 0.3895, 0.1683], grad_fn=<SelectBackward0>)
tensor([0.0969, 0.0754, 0.7510, 0.0768], grad_fn=<SelectBackward0>)
tensor([0.0469, 0.1540, 0.6814, 0.1176], grad_fn=<SelectBackward0>)
tensor(0.7745, grad_fn=<NllLoss2DBackward0>)


In [None]:
print("\n embedding.weight \n")
print(model.embedding.weight)

print("\n fc.weight \n")
print(model.fc.weight)

print("\n fc.bias \n")
print(model.fc.bias)


 embedding.weight 

Parameter containing:
tensor([[ 0.7321, -1.2503,  0.8154, -0.4987],
        [-0.2165,  1.7823, -0.6892,  0.9476],
        [ 1.1043,  0.3289, -1.5284,  0.6520],
        [-1.0115,  0.3729,  0.0065, -0.7789],
        [ 0.4528, -0.1476,  1.2930,  0.7842],
        [-1.3459,  0.6791, -0.8853,  1.1207],
        [ 0.9823, -0.5504,  0.4389, -1.2396],
        [ 0.9401,  1.3043, -0.5201, -0.6851],
        [-0.0249, -1.5749,  1.1024,  0.4784]], requires_grad=True)

 fc.weight 

Parameter containing:
tensor([[ 0.3290, -0.3039,  0.7327,  0.2552],
        [-0.3190,  0.2951, -0.2671,  0.3325],
        [ 0.4922,  0.8209, -0.8966,  0.1451],
        [ 0.1064,  0.3326,  0.1371,  0.5795]], requires_grad=True)

 fc.bias 

Parameter containing:
tensor([ 0.3466, -0.1747,  0.3644, -0.3236], requires_grad=True)


## Train with full data

In [None]:
custom_weights = torch.tensor( [[ 0.7321, -1.2503,  0.8154, -0.4987],
                                [-0.2165,  1.7823, -0.6892,  0.9476],
                                [ 1.1043,  0.3289, -1.5284,  0.6520],
                                [-0.8116,  0.5672,  0.2031, -0.9725],
                                [ 0.4528, -0.1476,  1.2930,  0.7842],
                                [-1.3459,  0.6791, -0.8853,  1.1207],
                                [ 0.9823, -0.5504,  0.4389, -1.2396],
                                [ 0.7415,  1.1048, -0.3207, -0.4871],
                                [-0.2158, -1.3749,  0.9036,  0.6724]]).float()
model.embedding.weight = nn.Parameter(custom_weights)

fc_weights = torch.tensor( [[ 0.3748, -0.1062,  0.5347,  0.0573],
                            [-0.1189,  0.0972, -0.4471,  0.5312],
                            [ 0.2925,  0.6210, -0.6989, -0.0523],
                            [-0.0912,  0.1413,  0.3324,  0.3857]]).float()
model.fc.weight = nn.Parameter(fc_weights)

fc_bias = torch.tensor([ 0.1987, -0.3745,  0.5632, -0.1256]).float()
model.fc.bias = nn.Parameter(fc_bias)

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
input_data = torch.tensor( [[7, 8, 2, 5],
                            [4, 3, 6, 1]], dtype=torch.long)
label_data = torch.tensor([[0, 1, 2, 0],
                           [0, 1, 2, 3]], dtype=torch.long)

for _ in range(30):
    optimizer.zero_grad()
    outputs = model(input_data)
    loss = criterion(outputs, label_data)
    print(loss.item())
    loss.backward()
    optimizer.step()

1.5842945575714111
1.5465704202651978
1.5097811222076416
1.4739363193511963
1.439034104347229
1.4050570726394653
1.3719760179519653
1.3397586345672607
1.3083724975585938
1.2777864933013916
1.2479702234268188
1.2188959121704102
1.1905386447906494
1.1628729104995728
1.1358730792999268
1.1095119714736938
1.0837604999542236
1.0585891008377075
1.0339672565460205
1.009865403175354
0.9862543940544128
0.9631068110466003
0.9403967261314392
0.9181000590324402
0.8961944580078125
0.8746592402458191
0.8534750938415527
0.8326242566108704
0.8120900988578796
0.7918574213981628


In [None]:
outputs = model(input_data)
o_softmax = torch.softmax(outputs, axis=1)

print(o_softmax[0, :, 0])
print(o_softmax[0, :, 1])
print(o_softmax[0, :, 2])
print(o_softmax[0, :, 3])

print()
print(o_softmax[1, :, 0])
print(o_softmax[1, :, 1])
print(o_softmax[1, :, 2])
print(o_softmax[1, :, 3])

tensor([0.3137, 0.0822, 0.4638, 0.1403], grad_fn=<SelectBackward0>)
tensor([0.4286, 0.4155, 0.0556, 0.1002], grad_fn=<SelectBackward0>)
tensor([0.0899, 0.0387, 0.8241, 0.0473], grad_fn=<SelectBackward0>)
tensor([0.3242, 0.3565, 0.1743, 0.1450], grad_fn=<SelectBackward0>)

tensor([0.6592, 0.1276, 0.0717, 0.1415], grad_fn=<SelectBackward0>)
tensor([0.2796, 0.3052, 0.2586, 0.1566], grad_fn=<SelectBackward0>)
tensor([0.2048, 0.0612, 0.6425, 0.0915], grad_fn=<SelectBackward0>)
tensor([0.3258, 0.1349, 0.3661, 0.1732], grad_fn=<SelectBackward0>)


In [None]:
[[0, 1, 2, 0],
 [0, 1, 2, 3]]

[[0, 1, 2, 0], [0, 1, 2, 3]]

In [None]:
print("\n embedding.weight \n")
print(model.embedding.weight)

print("\n fc.weight \n")
print(model.fc.weight)

print("\n fc.bias \n")
print(model.fc.bias)


 embedding.weight 

Parameter containing:
tensor([[ 0.7321, -1.2503,  0.8154, -0.4987],
        [-0.2165,  1.7823, -0.6892,  0.9476],
        [ 1.4126,  0.5986, -1.8277,  0.3537],
        [-1.1242,  0.2551, -0.0507, -0.6943],
        [ 0.7029, -0.3955,  1.5791,  0.6880],
        [-1.0716,  0.4592, -0.6084,  1.0576],
        [ 1.3065, -0.2989,  0.1638, -1.5513],
        [ 0.7718,  0.8467, -0.0353, -0.1614],
        [-0.5230, -1.5754,  0.6373,  0.9254]], requires_grad=True)

 fc.weight 

Parameter containing:
tensor([[ 0.1816,  0.1791,  0.2891,  0.3403],
        [-0.4253, -0.2016, -0.1751,  0.2594],
        [ 0.5775,  0.3558, -0.8107, -0.3518],
        [ 0.0713,  0.3504,  0.0741,  0.1267]], requires_grad=True)

 fc.bias 

Parameter containing:
tensor([ 0.4903, -0.0872,  0.2756, -0.4163], requires_grad=True)
