Transformer model with pytorch

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np

In [6]:
all_slogans = pd.read_csv('all_slogans.csv', sep=';')
slogans = all_slogans['slogan']
slogans = slogans.str.lower()

# reducing invaluable tokens
to_remove = ['\n', '\r', '>', '\x80', '\x93', '\x94', '\x99', '\x9d', '\xa0',
             '¦', '®', '°', 'º', '¼', '½','×', 'â', 'ã', 'è', 'é', 'ï', 'ñ', 'ú', 'ü',
             '⁄', '（', '）', '，', '·']

dict_to_remove = {"’" : "'", "‘" : "'", "“" : '"', "”" : '"',
                  "…" : '...', '—': '-', '–': '-'}


# normalizing the tokens
for char in to_remove:
    slogans = slogans.str.replace(char, ' ')


for key, value in dict_to_remove.items():
    slogans = slogans.str.replace(key, value)


# getting the character set

characters = [char for slogan in slogans for char in slogan]
characters = sorted((set(characters)))
print(characters)
len(characters)

[' ', '!', '"', '#', '$', '%', '&', "'", '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '£']


57

In [7]:
# encoding string to integers sequence
# decoding integers to string sequence
to_int = {char: idx for idx, char in enumerate(characters)}
to_str = {idx: char for idx, char in enumerate(characters)}

encode = lambda sentence: [to_int[char] for char in sentence]
decode = lambda sentence: [to_str[char] for char in sentence]

encoded_slogans = [torch.tensor(encode(slogan)) for slogan in slogans]

tensor([49, 37, 34,  0, 38, 43, 49, 34, 47, 43, 30, 49, 38, 44, 43, 30, 41,  0,
        30, 38, 47, 41, 38, 43, 34,  0, 44, 35,  0, 34, 36, 54, 45, 49])

In [10]:
# define hyperparameters
vocab_size = len(characters)
d_model = 512 # dim of the embedding vector
nhead = 8
num_encoder_layers = 3
num_decoder_layers = 3
dim_feedforward = 2048
max_seq_length = 100

In [34]:
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_encoder_layers,
                  num_decoder_layers, dim_feedforward, max_seq_length):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_length, d_model))
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward)
        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src) + self.positional_encoding[:, :src.size(1), :]
        tgt = self.embedding(tgt) + self.positional_encoding[:, :tgt.size(1), :]
        output = self.transformer(src, tgt)
        output = self.fc_out(output)
        return output
    
model = TransformerModel(vocab_size, d_model, nhead, num_encoder_layers,
                          num_decoder_layers, dim_feedforward, max_seq_length)



criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Example training loop
num_epochs = 5
for epoch in range(num_epochs):
    for slogan in slogans[:20]:
        optimizer.zero_grad()
        input_sequence = torch.tensor(encode(slogan[:-1]), dtype=torch.long).unsqueeze(0)
        target_sequence = torch.tensor(encode(slogan[1:]), dtype=torch.long).unsqueeze(0)
        output = model(input_sequence, input_sequence)
        loss = criterion(output.view(-1, vocab_size), target_sequence.view(-1))
        loss.backward()
        optimizer.step()
        print(f'Epoch: {epoch}, Loss: {loss.item()}')

Epoch: 0, Loss: 4.302258491516113
Epoch: 0, Loss: 3.8550021648406982
Epoch: 0, Loss: 3.3839778900146484
Epoch: 0, Loss: 3.747529983520508
Epoch: 0, Loss: 3.6151905059814453
Epoch: 0, Loss: 3.605558395385742
Epoch: 0, Loss: 3.2459630966186523
Epoch: 0, Loss: 3.5550854206085205
Epoch: 0, Loss: 2.7653021812438965
Epoch: 0, Loss: 2.939154863357544
Epoch: 0, Loss: 3.381559133529663
Epoch: 0, Loss: 3.327558755874634
Epoch: 0, Loss: 3.8296079635620117
Epoch: 0, Loss: 3.983919620513916
Epoch: 0, Loss: 3.4186267852783203
Epoch: 0, Loss: 3.138753890991211
Epoch: 0, Loss: 3.4627184867858887
Epoch: 0, Loss: 3.1635944843292236
Epoch: 0, Loss: 3.2358314990997314
Epoch: 0, Loss: 3.0759270191192627
Epoch: 1, Loss: 3.041801691055298
Epoch: 1, Loss: 2.9392406940460205
Epoch: 1, Loss: 2.958998203277588
Epoch: 1, Loss: 3.1173813343048096
Epoch: 1, Loss: 2.901092290878296
Epoch: 1, Loss: 3.3675312995910645
Epoch: 1, Loss: 2.6880815029144287
Epoch: 1, Loss: 3.149712085723877
Epoch: 1, Loss: 2.73130679130554

In [57]:
def generate_slogan(model, start_sequence, max_lenght=100):
    model.eval()
    input_sequence = torch.tensor(encode(start_sequence), dtype=torch.long).unsqueeze(0)
    generated_sequence = input_sequence.tolist()[0]

    for _ in range(max_lenght - len(start_sequence)):
        input_tensor = torch.tensor(generated_sequence[-max_lenght:], dtype=torch.long).unsqueeze(0)
        output = model(input_tensor, input_tensor)
        next_token = torch.argmax(F.softmax(output[0, -1, :], dim=0)).item()
        generated_sequence.append(next_token)
        if to_str[next_token] == ' ':
            break
    
    return ''.join([to_str[idx] for idx in generated_sequence])

start_sequence = "who are yo"
generated_slogan = generate_slogan(model, start_sequence)
print(f"Generated slogan: {generated_slogan}")

Generated slogan: who are yor 
