In [1]:
from src.data_processing import load_processed_fairytales_dataset_for_lstm, save_experiment_input, shuffle
from src.my_lstm import Decoder, MyLSTM
import torch
from tqdm import tqdm
import plotly.express as px
import pandas as pd

In [2]:
splitted_dataset, vocab, reverse_vocab = load_processed_fairytales_dataset_for_lstm(how_many=10, context_size=50)

Reading files: 2124 files [00:00, 3933.35 files/s]
Preprocessing: 100%|██████████| 10/10 [00:00<00:00, 2500.63it/s]
Converting strings to integers: 100%|██████████| 10/10 [00:00<?, ?it/s]
Generating encoded pairs: 100%|██████████| 10/10 [00:00<00:00, 5000.96it/s]


In [3]:
dataset = torch.tensor(splitted_dataset, dtype=torch.long)

In [4]:
experiment_number = 2  # experiment_number = get_next_folder_number(Path('./models'))
save_experiment_input(dataset, dataset, vocab, reverse_vocab, experiment_number)

Saved successfully


In [None]:
num_epochs = 200
lr = 0.001
batch_size = 128

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size_decoder = len(vocab)
output_size = len(vocab)

decoder_embedding_size = 300
hidden_size = 1024  # Needs to be the same for both RNN's
num_layers = 2
dec_dropout = 0.1

decoder_net = Decoder(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dec_dropout,
).to(device)

model = MyLSTM(decoder_net, vocab_size=len(vocab)).to(device)
# model = decoder_net
optimizer = torch.optim.Adam(model.parameters(), lr=lr, eps=1e-9)
criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

losses = []
for epoch in range(1, num_epochs + 1):
    model.train()
    
    p_bar = tqdm(range(0, len(dataset), batch_size), desc=f"Learning epoch: {epoch}")
    dataset, _ = shuffle(dataset, dataset)
    
    average_loss_in_epoch = 0
    for batch in p_bar:

        src_batch = dataset[batch:batch + batch_size].to(device)
        src_batch = src_batch.T
    
        output = model(src_batch)
        output = output[1:].reshape(-1, output.shape[2])
        src_batch = src_batch[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, src_batch)
        loss.backward()
        
        losses.append(loss.item())
        average_loss_in_epoch += loss.item()
        p_bar.set_description(
            desc=f"Learning epoch: {epoch}, average loss: {average_loss_in_epoch / ((batch // batch_size) + 1 ):.4f}")
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()
        
    if not epoch % 50:
        torch.save(model, f'./models/{experiment_number}/model_after_{epoch}_epoch.pth')

Learning epoch: 1, average loss: 6.6287: 100%|██████████| 43/43 [00:10<00:00,  4.00it/s]
Learning epoch: 2, average loss: 6.2939: 100%|██████████| 43/43 [00:10<00:00,  4.15it/s]
Learning epoch: 3, average loss: 6.2423: 100%|██████████| 43/43 [00:10<00:00,  4.15it/s]
Learning epoch: 4, average loss: 6.1981: 100%|██████████| 43/43 [00:10<00:00,  4.15it/s]
Learning epoch: 5, average loss: 6.1603: 100%|██████████| 43/43 [00:10<00:00,  4.13it/s]
Learning epoch: 6, average loss: 6.0965: 100%|██████████| 43/43 [00:10<00:00,  4.13it/s]
Learning epoch: 7, average loss: 6.0457: 100%|██████████| 43/43 [00:10<00:00,  4.13it/s]
Learning epoch: 8, average loss: 5.9098: 100%|██████████| 43/43 [00:10<00:00,  4.13it/s]
Learning epoch: 9, average loss: 5.7047: 100%|██████████| 43/43 [00:10<00:00,  4.12it/s]
Learning epoch: 10, average loss: 5.7123: 100%|██████████| 43/43 [00:10<00:00,  4.12it/s]
Learning epoch: 11, average loss: 5.6160: 100%|██████████| 43/43 [00:10<00:00,  4.11it/s]
Learning epoch: 12,

In [None]:
loss_path = f'./models/{experiment_number}/training_losses.txt'
with open(loss_path, 'w') as file:
    for loss in losses:
        file.write(str(loss) + '\n')

In [None]:
data = pd.DataFrame({'x': range(len(losses)),
                     'y': losses})

fig = px.line(data, x='x', y='y', title='Line Plot Example')
fig.show()