In [1]:
from src.data_processing import load_processed_fairytales_dataset_for_lstm, save_experiment_input, shuffle
from src.my_lstm_v2 import MyLSTM, check_batch_len
import torch
from tqdm import tqdm
import plotly.express as px
import pandas as pd

In [2]:
splitted_dataset, vocab, reverse_vocab = load_processed_fairytales_dataset_for_lstm(how_many=100, context_size=51)
dataset = torch.tensor(splitted_dataset, dtype=torch.long)

experiment_number = 3  # experiment_number = get_next_folder_number(Path('./models'))
save_experiment_input(dataset, dataset, vocab, reverse_vocab, experiment_number)

Reading files: 2124 files [00:00, 4100.39 files/s]
Preprocessing: 100%|██████████| 100/100 [00:00<00:00, 3030.44it/s]
Converting strings to integers: 100%|██████████| 100/100 [00:00<00:00, 16664.56it/s]
Generating encoded pairs: 100%|██████████| 100/100 [00:00<00:00, 900.89it/s]


Saved successfully


In [3]:
dataset = torch.tensor(splitted_dataset, dtype=torch.long)

In [4]:
sequence_length = 50  
batch_size = 128
num_epochs = 160
vocab_size = len(vocab)
output_size = vocab_size
embedding_dim = 256
hidden_dim = 2048
n_layers = 2
lr = 0.001

In [5]:
model = MyLSTM(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.25)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

optimizer = torch.optim.Adam(model.parameters(), lr=lr, eps=1e-9)
criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

model.to(device)
model.train()

losses = []
for epoch in range(1, num_epochs + 1):
    
    hidden = model.init_hidden(batch_size)
    p_bar = tqdm(range(0, len(dataset), batch_size), desc=f"Learning epoch: {epoch}")
    dataset, _ = shuffle(dataset, dataset)
    
    average_loss_in_epoch = 0
    for batch in p_bar:
        data_batch = dataset[batch:batch + batch_size, :-1].to(device)
        label_batch = dataset[batch:batch + batch_size, -1].to(device)
                        
        data_batch, label_batch = check_batch_len(data_batch, label_batch, batch_size)
        h = tuple([each.data for each in hidden])
        model.zero_grad()
    
        output, _ = model(data_batch, h)
    
        loss = criterion(output, label_batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
        
        losses.append(loss.item())
        average_loss_in_epoch += loss.item()
        p_bar.set_description(
            desc=f"Learning epoch: {epoch}, average loss: {average_loss_in_epoch / ((batch // batch_size) + 1 ):.4f}")
        
    if not epoch % 5:
        torch.save(model, f'./models/{experiment_number}/model_after_{epoch}_epoch.pth')

Learning epoch: 1, average loss: 6.9166: 100%|██████████| 430/430 [06:26<00:00,  1.11it/s]
Learning epoch: 2, average loss: 6.2456: 100%|██████████| 430/430 [06:23<00:00,  1.12it/s]
Learning epoch: 3, average loss: 5.6953: 100%|██████████| 430/430 [06:22<00:00,  1.12it/s]
Learning epoch: 4, average loss: 4.9863: 100%|██████████| 430/430 [06:22<00:00,  1.12it/s]
Learning epoch: 5, average loss: 4.0446: 100%|██████████| 430/430 [06:22<00:00,  1.12it/s]
Learning epoch: 6, average loss: 2.8484: 100%|██████████| 430/430 [06:20<00:00,  1.13it/s]
Learning epoch: 7, average loss: 1.6583: 100%|██████████| 430/430 [06:20<00:00,  1.13it/s]
Learning epoch: 8, average loss: 0.8714: 100%|██████████| 430/430 [06:20<00:00,  1.13it/s]
Learning epoch: 9, average loss: 0.4607: 100%|██████████| 430/430 [06:20<00:00,  1.13it/s]
Learning epoch: 10, average loss: 0.2300: 100%|██████████| 430/430 [06:20<00:00,  1.13it/s]
Learning epoch: 11, average loss: 0.0999: 100%|██████████| 430/430 [06:20<00:00,  1.13it/

In [6]:
loss_path = f'./models/{experiment_number}/training_losses.txt'
with open(loss_path, 'w') as file:
    for loss in losses:
        file.write(str(loss) + '\n')

In [7]:
data = pd.DataFrame({'x': range(len(losses)),
                     'y': losses})

fig = px.line(data, x='x', y='y', title='Line Plot Example')
fig.show()