In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm

import numpy as np

import My_dataset as md
from preprocess import build_vocab, tokenize, numericalize, pad_sequences, TranslationDataset
import building_blocks as bb
import embeddings as emb
import tools

import os
from torch import save as torch_save
from torch import load as torch_load

print("Starting main.py file ...")

Starting main.py file ...


In [2]:
# Configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
EMBED_DIM = 300
MAX_LENGTH = 20

print("I will be loading the embeddings ...")

# Pre-trained embeddings
en_emb = emb.load_embeddings('en')
de_emb = emb.load_embeddings('de')

# Tokenization and Vocabulary
print("will start tockenizing ...")

def build_vocab(sentences, min_freq=1):
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}
    word_counts = {}
    for sent in sentences:
        for word in sent.lower().split():
            word_counts[word] = word_counts.get(word, 0) + 1
    
    idx = 4
    for word, count in word_counts.items():
        if count >= min_freq:
            vocab[word] = idx
            idx += 1
    return vocab

I will be loading the embeddings ...
I will be importing the en embeddings now / -> from embeddings.py file
I will be importing the de embeddings now / -> from embeddings.py file
will start tockenizing ...


In [3]:

# Build vocabularies
src_sentences = [pair[0] for pair in md.sampled_pairs]
trg_sentences = [pair[1] for pair in md.sampled_pairs]

src_vocab = build_vocab(src_sentences)
trg_vocab = build_vocab(trg_sentences)

# Create embedding matrices
src_embed_matrix = emb.create_embedding_matrix(src_vocab, en_emb)
trg_embed_matrix = emb.create_embedding_matrix(trg_vocab, de_emb)


In [4]:
print("will initialize the model...")

# Initialize model
model = bb.Transformer(
    src_vocab_size=len(src_vocab),
    trg_vocab_size=len(trg_vocab),
    src_pad_idx=0,
    trg_pad_idx=0,
    embed_size=EMBED_DIM,
    num_layers=2,
    heads=4,
    forward_expansion=4,
    dropout=0.1,
    device=DEVICE,
    max_length=MAX_LENGTH
)

will initialize the model...


In [6]:
BATCH_SIZE = 12
NUM_EPOCHS = 400

In [7]:

# Replace embeddings with pre-trained
model.encoder.word_embedding.weight.data.copy_(src_embed_matrix)
model.decoder.word_embedding.weight.data.copy_(trg_embed_matrix)

# Freeze embeddings if needed
# model.encoder.word_embedding.weight.requires_grad = False
# model.decoder.word_embedding.weight.requires_grad = False

model = model.to(DEVICE)

# Dataset and DataLoader
dataset = TranslationDataset(md.sentence_pairs, src_vocab, trg_vocab, MAX_LENGTH)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)


In [8]:

# Training setup
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [9]:


print("Starting the training loop ...")

# Training loop
for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0
    progress_bar = tqdm(loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}")
    
    for src, trg in loader:
        src = src.to(DEVICE)
        trg = trg.to(DEVICE)
        
        # Forward pass
        optimizer.zero_grad()
        output = model(src, trg[:, :-1])
        output_dim = output.shape[-1]
        
        # Compute loss
        loss = criterion(output.reshape(-1, output_dim), trg[:, 1:].reshape(-1))
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / len(loader)
    print(f'Epoch {epoch+1:03d} | Loss: {avg_loss:.4f}')
    # Checkpointing
    # Save checkpoint every 50 epochs
    if (epoch+1) % 50 == 0:
        checkpoint_path = f"checkpoints/model_epoch_{epoch+1}.pth"
        torch_save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
            'src_vocab': src_vocab,
            'trg_vocab': trg_vocab,
            'max_length': MAX_LENGTH
        }, checkpoint_path)
        #torch_save(checkpoint, f"checkpoints/model_epoch_{epoch+1}.pth")
        print(f"Checkpoint saved for epoch {epoch+1} at {checkpoint_path}")


Starting the training loop ...


Epoch 1/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 001 | Loss: 4.6778


Epoch 1/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 002 | Loss: 4.3140


Epoch 2/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 003 | Loss: 4.0134


Epoch 3/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 004 | Loss: 3.9137


Epoch 4/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 005 | Loss: 3.7868


Epoch 5/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 006 | Loss: 3.6715


Epoch 6/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 007 | Loss: 3.6032


Epoch 7/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 008 | Loss: 3.6002


Epoch 8/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 009 | Loss: 3.5058


Epoch 9/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 010 | Loss: 3.4112


Epoch 10/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 011 | Loss: 3.3825


Epoch 11/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 012 | Loss: 3.3419


Epoch 12/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 013 | Loss: 3.2092


Epoch 13/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 014 | Loss: 3.2102


Epoch 14/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 015 | Loss: 3.1543


Epoch 15/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 016 | Loss: 3.0697


Epoch 16/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 017 | Loss: 3.0713


Epoch 17/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 018 | Loss: 3.0106


Epoch 18/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 019 | Loss: 2.9243


Epoch 19/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 020 | Loss: 2.9394


Epoch 20/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 021 | Loss: 2.9627


Epoch 21/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 022 | Loss: 2.8784


Epoch 22/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 023 | Loss: 2.7766


Epoch 23/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 024 | Loss: 2.7470


Epoch 24/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 025 | Loss: 2.7589


Epoch 25/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 026 | Loss: 2.7084


Epoch 26/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 027 | Loss: 2.6864


Epoch 27/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 028 | Loss: 2.6513


Epoch 28/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 029 | Loss: 2.6582


Epoch 29/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 030 | Loss: 2.5688


Epoch 30/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 031 | Loss: 2.6037


Epoch 31/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 032 | Loss: 2.5317


Epoch 32/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 033 | Loss: 2.5482


Epoch 33/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 034 | Loss: 2.4601


Epoch 34/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 035 | Loss: 2.4620


Epoch 35/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 036 | Loss: 2.4257


Epoch 36/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 037 | Loss: 2.4215


Epoch 37/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 038 | Loss: 2.3837


Epoch 38/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 039 | Loss: 2.3313


Epoch 39/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 040 | Loss: 2.3043


Epoch 40/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 041 | Loss: 2.3546


Epoch 41/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 042 | Loss: 2.2982


Epoch 42/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 043 | Loss: 2.2426


Epoch 43/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 044 | Loss: 2.2964


Epoch 44/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 045 | Loss: 2.2610


Epoch 45/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 046 | Loss: 2.2912


Epoch 46/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 047 | Loss: 2.2675


Epoch 47/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 048 | Loss: 2.2778







Epoch 48/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 049 | Loss: 2.1415


Epoch 49/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 050 | Loss: 2.1786
Checkpoint saved for epoch 50 at checkpoints/model_epoch_50.pth


Epoch 50/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 051 | Loss: 2.1398


Epoch 51/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 052 | Loss: 2.1842


Epoch 52/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 053 | Loss: 2.1468


Epoch 53/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 054 | Loss: 2.1550


Epoch 54/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 055 | Loss: 2.0812


Epoch 55/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 056 | Loss: 2.0871







Epoch 56/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 057 | Loss: 2.0210


Epoch 57/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 058 | Loss: 2.0724


Epoch 58/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 059 | Loss: 2.1487


Epoch 59/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 060 | Loss: 2.0270


Epoch 60/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 061 | Loss: 2.0613


Epoch 61/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 062 | Loss: 1.9854


Epoch 62/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 063 | Loss: 2.0130


Epoch 63/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 064 | Loss: 2.0203


Epoch 64/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 065 | Loss: 1.9779


Epoch 65/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 066 | Loss: 2.0083


Epoch 66/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 067 | Loss: 1.9453


Epoch 67/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 068 | Loss: 1.9217


Epoch 68/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 069 | Loss: 1.9699


Epoch 69/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 070 | Loss: 1.9134


Epoch 70/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 071 | Loss: 1.9294


Epoch 71/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 072 | Loss: 1.8463


Epoch 72/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 073 | Loss: 1.9137


Epoch 73/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 074 | Loss: 1.9030


Epoch 74/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 075 | Loss: 1.9162


Epoch 75/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 076 | Loss: 1.9158


Epoch 76/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 077 | Loss: 1.8078


Epoch 77/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 078 | Loss: 1.8435


Epoch 78/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 079 | Loss: 1.8386


Epoch 79/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 080 | Loss: 1.8717


Epoch 80/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 081 | Loss: 1.8399







Epoch 81/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 082 | Loss: 1.8896


Epoch 82/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 083 | Loss: 1.8364







Epoch 83/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 084 | Loss: 1.7135


Epoch 84/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 085 | Loss: 1.7837


Epoch 85/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 086 | Loss: 1.7763


Epoch 86/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 087 | Loss: 1.7899


Epoch 87/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 088 | Loss: 1.7990


Epoch 88/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 089 | Loss: 1.7370


Epoch 89/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 090 | Loss: 1.7801







Epoch 90/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 091 | Loss: 1.6965


Epoch 91/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 092 | Loss: 1.6949







Epoch 92/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 093 | Loss: 1.6782


Epoch 93/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 094 | Loss: 1.7024







Epoch 94/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 095 | Loss: 1.6768







Epoch 95/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 096 | Loss: 1.5822







Epoch 96/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 097 | Loss: 1.6453


Epoch 97/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 098 | Loss: 1.6212


Epoch 98/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 099 | Loss: 1.7044



Epoch 99/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 100 | Loss: 1.6140





Checkpoint saved for epoch 100 at checkpoints/model_epoch_100.pth


Epoch 100/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 101 | Loss: 1.5961


Epoch 101/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 102 | Loss: 1.6962



Epoch 102/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 103 | Loss: 1.6269


Epoch 103/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 104 | Loss: 1.6036


Epoch 104/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 105 | Loss: 1.6140


Epoch 105/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 106 | Loss: 1.5812


Epoch 106/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 107 | Loss: 1.4862


Epoch 107/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 108 | Loss: 1.4966


Epoch 108/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 109 | Loss: 1.5599







Epoch 109/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 110 | Loss: 1.4895


Epoch 110/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 111 | Loss: 1.5427







Epoch 111/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 112 | Loss: 1.5169


Epoch 112/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 113 | Loss: 1.5126







Epoch 113/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 114 | Loss: 1.4672


Epoch 114/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 115 | Loss: 1.4666







Epoch 115/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 116 | Loss: 1.4492


Epoch 116/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 117 | Loss: 1.4838


Epoch 117/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 118 | Loss: 1.4552


Epoch 118/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 119 | Loss: 1.4364


Epoch 119/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 120 | Loss: 1.3420


Epoch 120/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 121 | Loss: 1.4372







Epoch 121/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 122 | Loss: 1.3978


Epoch 122/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 123 | Loss: 1.4126


Epoch 123/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 124 | Loss: 1.3478



Epoch 124/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 125 | Loss: 1.3548


Epoch 125/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 126 | Loss: 1.3650


Epoch 126/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 127 | Loss: 1.3740


Epoch 127/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 128 | Loss: 1.3893


Epoch 128/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 129 | Loss: 1.3017


Epoch 129/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 130 | Loss: 1.3256


Epoch 130/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 131 | Loss: 1.2754







Epoch 131/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 132 | Loss: 1.2757







Epoch 132/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 133 | Loss: 1.2594


Epoch 133/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 134 | Loss: 1.2830


Epoch 134/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 135 | Loss: 1.2546


Epoch 135/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 136 | Loss: 1.2699


Epoch 136/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 137 | Loss: 1.3069


Epoch 137/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 138 | Loss: 1.2082







Epoch 138/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 139 | Loss: 1.2002


Epoch 139/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 140 | Loss: 1.2557


Epoch 140/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 141 | Loss: 1.2461


Epoch 141/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 142 | Loss: 1.2116







Epoch 142/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 143 | Loss: 1.1907


Epoch 143/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 144 | Loss: 1.1733


Epoch 144/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 145 | Loss: 1.1364


Epoch 145/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 146 | Loss: 1.1257


Epoch 146/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 147 | Loss: 1.1917


Epoch 147/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 148 | Loss: 1.2092


Epoch 148/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 149 | Loss: 1.1566


Epoch 149/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 150 | Loss: 1.1623
Checkpoint saved for epoch 150 at checkpoints/model_epoch_150.pth


Epoch 150/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 151 | Loss: 1.1009


Epoch 151/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 152 | Loss: 1.1672


Epoch 152/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 153 | Loss: 1.1574


Epoch 153/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 154 | Loss: 1.0457


Epoch 154/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 155 | Loss: 1.0578


Epoch 155/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 156 | Loss: 1.0818


Epoch 156/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 157 | Loss: 1.0841


Epoch 157/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 158 | Loss: 0.9852







Epoch 158/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 159 | Loss: 1.0346


Epoch 159/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 160 | Loss: 1.0273







Epoch 160/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 161 | Loss: 1.0225


Epoch 161/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 162 | Loss: 1.0649


Epoch 162/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 163 | Loss: 1.0208


Epoch 163/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 164 | Loss: 1.0093


Epoch 164/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 165 | Loss: 1.0410


Epoch 165/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 166 | Loss: 0.9513


Epoch 166/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 167 | Loss: 1.0217


Epoch 167/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 168 | Loss: 0.9992







Epoch 168/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 169 | Loss: 0.8809


Epoch 169/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 170 | Loss: 0.9384


Epoch 170/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 171 | Loss: 0.9679



Epoch 171/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 172 | Loss: 0.8961


Epoch 172/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 173 | Loss: 0.8808


Epoch 173/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 174 | Loss: 0.9040


Epoch 174/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 175 | Loss: 0.8946


Epoch 175/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 176 | Loss: 0.8898


Epoch 176/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 177 | Loss: 0.9448


Epoch 177/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 178 | Loss: 0.8940


Epoch 178/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 179 | Loss: 0.8843


Epoch 179/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 180 | Loss: 0.8189


Epoch 180/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 181 | Loss: 0.8723



Epoch 181/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 182 | Loss: 0.8539


Epoch 182/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 183 | Loss: 0.8961


Epoch 183/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 184 | Loss: 0.8226



Epoch 184/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 185 | Loss: 0.8027


Epoch 185/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 186 | Loss: 0.8242


Epoch 186/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 187 | Loss: 0.9029


Epoch 187/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 188 | Loss: 0.8154


Epoch 188/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 189 | Loss: 0.7615


Epoch 189/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 190 | Loss: 0.8199


Epoch 190/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 191 | Loss: 0.7957


Epoch 191/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 192 | Loss: 0.8022


Epoch 192/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 193 | Loss: 0.7799


Epoch 193/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 194 | Loss: 0.7346


Epoch 194/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 195 | Loss: 0.7513


Epoch 195/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 196 | Loss: 0.7145


Epoch 196/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 197 | Loss: 0.8079


Epoch 197/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 198 | Loss: 0.7086


Epoch 198/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 199 | Loss: 0.7316


Epoch 199/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 200 | Loss: 0.6975
Checkpoint saved for epoch 200 at checkpoints/model_epoch_200.pth


Epoch 200/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 201 | Loss: 0.6766


Epoch 201/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 202 | Loss: 0.7062


Epoch 202/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 203 | Loss: 0.7047


Epoch 203/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 204 | Loss: 0.7174


Epoch 204/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 205 | Loss: 0.6336


Epoch 205/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 206 | Loss: 0.6967


Epoch 206/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 207 | Loss: 0.6893


Epoch 207/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 208 | Loss: 0.6731


Epoch 208/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 209 | Loss: 0.6305


Epoch 209/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 210 | Loss: 0.6473


Epoch 210/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 211 | Loss: 0.7014


Epoch 211/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 212 | Loss: 0.6537


Epoch 212/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 213 | Loss: 0.6520


Epoch 213/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 214 | Loss: 0.6621


Epoch 214/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 215 | Loss: 0.6064


Epoch 215/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 216 | Loss: 0.6182


Epoch 216/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 217 | Loss: 0.6485


Epoch 217/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 218 | Loss: 0.6472


Epoch 218/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 219 | Loss: 0.6170


Epoch 219/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 220 | Loss: 0.5956


Epoch 220/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 221 | Loss: 0.5970


Epoch 221/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 222 | Loss: 0.6285


Epoch 222/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 223 | Loss: 0.6126


Epoch 223/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 224 | Loss: 0.5631


Epoch 224/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 225 | Loss: 0.5755


Epoch 225/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 226 | Loss: 0.5150


Epoch 226/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 227 | Loss: 0.5837


Epoch 227/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 228 | Loss: 0.5106


Epoch 228/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 229 | Loss: 0.5450


Epoch 229/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 230 | Loss: 0.4890


Epoch 230/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 231 | Loss: 0.5531


Epoch 231/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 232 | Loss: 0.5333


Epoch 232/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 233 | Loss: 0.5064


Epoch 233/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 234 | Loss: 0.4985


Epoch 234/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 235 | Loss: 0.5172


Epoch 235/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 236 | Loss: 0.4470


Epoch 236/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 237 | Loss: 0.5311


Epoch 237/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 238 | Loss: 0.4396


Epoch 238/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 239 | Loss: 0.5052


Epoch 239/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 240 | Loss: 0.4769


Epoch 240/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 241 | Loss: 0.4676


Epoch 241/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 242 | Loss: 0.4406


Epoch 242/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 243 | Loss: 0.4572


Epoch 243/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 244 | Loss: 0.4384


Epoch 244/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 245 | Loss: 0.4407


Epoch 245/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 246 | Loss: 0.4400


Epoch 246/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 247 | Loss: 0.4848


Epoch 247/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 248 | Loss: 0.4461


Epoch 248/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 249 | Loss: 0.4407


Epoch 249/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 250 | Loss: 0.4164
Checkpoint saved for epoch 250 at checkpoints/model_epoch_250.pth


Epoch 250/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 251 | Loss: 0.4574


Epoch 251/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 252 | Loss: 0.4276







Epoch 252/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 253 | Loss: 0.4226


Epoch 253/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 254 | Loss: 0.3958


Epoch 254/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 255 | Loss: 0.4440


Epoch 255/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 256 | Loss: 0.4181


Epoch 256/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 257 | Loss: 0.3855


Epoch 257/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 258 | Loss: 0.3834


Epoch 258/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 259 | Loss: 0.3499


Epoch 259/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 260 | Loss: 0.3924


Epoch 260/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 261 | Loss: 0.3844


Epoch 261/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 262 | Loss: 0.4264


Epoch 262/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 263 | Loss: 0.3161


Epoch 263/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 264 | Loss: 0.3433


Epoch 264/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 265 | Loss: 0.3397


Epoch 265/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 266 | Loss: 0.3388


Epoch 266/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 267 | Loss: 0.3837


Epoch 267/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 268 | Loss: 0.2937


Epoch 268/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 269 | Loss: 0.3034


Epoch 269/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 270 | Loss: 0.2954


Epoch 270/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 271 | Loss: 0.3455


Epoch 271/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 272 | Loss: 0.3328


Epoch 272/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 273 | Loss: 0.3194


Epoch 273/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 274 | Loss: 0.2935


Epoch 274/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 275 | Loss: 0.3216


Epoch 275/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 276 | Loss: 0.3038


Epoch 276/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 277 | Loss: 0.2967


Epoch 277/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 278 | Loss: 0.3004



Epoch 278/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 279 | Loss: 0.2940


Epoch 279/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 280 | Loss: 0.3172


Epoch 280/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 281 | Loss: 0.3156


Epoch 281/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 282 | Loss: 0.2857


Epoch 282/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 283 | Loss: 0.3193


Epoch 283/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 284 | Loss: 0.3036


Epoch 284/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 285 | Loss: 0.2333


Epoch 285/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 286 | Loss: 0.2313


Epoch 286/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 287 | Loss: 0.3374


Epoch 287/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 288 | Loss: 0.2508


Epoch 288/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 289 | Loss: 0.2816


Epoch 289/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 290 | Loss: 0.2210


Epoch 290/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 291 | Loss: 0.2762







Epoch 291/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 292 | Loss: 0.2388


Epoch 292/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 293 | Loss: 0.2105


Epoch 293/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 294 | Loss: 0.2906


Epoch 294/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 295 | Loss: 0.2396


Epoch 295/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 296 | Loss: 0.2220


Epoch 296/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 297 | Loss: 0.2322


Epoch 297/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 298 | Loss: 0.2596


Epoch 298/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 299 | Loss: 0.2175


Epoch 299/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 300 | Loss: 0.2202
Checkpoint saved for epoch 300 at checkpoints/model_epoch_300.pth


Epoch 300/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 301 | Loss: 0.2130


Epoch 301/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 302 | Loss: 0.2081


Epoch 302/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 303 | Loss: 0.1813


Epoch 303/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 304 | Loss: 0.2301


Epoch 304/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 305 | Loss: 0.2050


Epoch 305/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 306 | Loss: 0.2339


Epoch 306/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 307 | Loss: 0.1993


Epoch 307/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 308 | Loss: 0.2110


Epoch 308/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 309 | Loss: 0.1885


Epoch 309/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 310 | Loss: 0.1758


Epoch 310/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 311 | Loss: 0.1873


Epoch 311/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 312 | Loss: 0.2096


Epoch 312/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 313 | Loss: 0.1922


Epoch 313/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 314 | Loss: 0.1837


Epoch 314/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 315 | Loss: 0.1921


Epoch 315/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 316 | Loss: 0.1826


Epoch 316/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 317 | Loss: 0.2055


Epoch 317/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 318 | Loss: 0.1639


Epoch 318/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 319 | Loss: 0.1632


Epoch 319/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 320 | Loss: 0.1393


Epoch 320/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 321 | Loss: 0.1740


Epoch 321/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 322 | Loss: 0.1413


Epoch 322/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 323 | Loss: 0.1440


Epoch 323/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 324 | Loss: 0.1323







Epoch 324/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 325 | Loss: 0.1898


Epoch 325/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 326 | Loss: 0.1500


Epoch 326/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 327 | Loss: 0.1429


Epoch 327/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 328 | Loss: 0.1288


Epoch 328/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 329 | Loss: 0.1188


Epoch 329/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 330 | Loss: 0.1119


Epoch 330/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 331 | Loss: 0.1256







Epoch 331/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 332 | Loss: 0.1387



Epoch 332/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 333 | Loss: 0.1038


Epoch 333/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 334 | Loss: 0.1150


Epoch 334/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 335 | Loss: 0.1054


Epoch 335/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 336 | Loss: 0.1301


Epoch 336/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 337 | Loss: 0.1272


Epoch 337/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 338 | Loss: 0.1414


Epoch 338/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 339 | Loss: 0.1030


Epoch 339/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 340 | Loss: 0.1666


Epoch 340/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 341 | Loss: 0.1225


Epoch 341/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 342 | Loss: 0.1371


Epoch 342/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 343 | Loss: 0.1253


Epoch 343/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 344 | Loss: 0.1095


Epoch 344/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 345 | Loss: 0.0894


Epoch 345/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 346 | Loss: 0.1024


Epoch 346/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 347 | Loss: 0.1064


Epoch 347/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 348 | Loss: 0.0917


Epoch 348/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 349 | Loss: 0.0765


Epoch 349/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 350 | Loss: 0.1003
Checkpoint saved for epoch 350 at checkpoints/model_epoch_350.pth


Epoch 350/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 351 | Loss: 0.0976


Epoch 351/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 352 | Loss: 0.1038


Epoch 352/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 353 | Loss: 0.1048


Epoch 353/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 354 | Loss: 0.0928


Epoch 354/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 355 | Loss: 0.0699


Epoch 355/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 356 | Loss: 0.1052


Epoch 356/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 357 | Loss: 0.1020


Epoch 357/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 358 | Loss: 0.0663


Epoch 358/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 359 | Loss: 0.0960


Epoch 359/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 360 | Loss: 0.0652


Epoch 360/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 361 | Loss: 0.0781


Epoch 361/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 362 | Loss: 0.0842


Epoch 362/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 363 | Loss: 0.0838


Epoch 363/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 364 | Loss: 0.0902


Epoch 364/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 365 | Loss: 0.0919


Epoch 365/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 366 | Loss: 0.0809







Epoch 366/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 367 | Loss: 0.0822


Epoch 367/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 368 | Loss: 0.0839


Epoch 368/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 369 | Loss: 0.0619







Epoch 369/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 370 | Loss: 0.0833







Epoch 370/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 371 | Loss: 0.0746


Epoch 371/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 372 | Loss: 0.1123



Epoch 372/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 373 | Loss: 0.0659


Epoch 373/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 374 | Loss: 0.0680


Epoch 374/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 375 | Loss: 0.0724







Epoch 375/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 376 | Loss: 0.0683







Epoch 376/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 377 | Loss: 0.1194


Epoch 377/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 378 | Loss: 0.0569



Epoch 378/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 379 | Loss: 0.0553


Epoch 379/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 380 | Loss: 0.0516


Epoch 380/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 381 | Loss: 0.0444







Epoch 381/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 382 | Loss: 0.0630







Epoch 382/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 383 | Loss: 0.0427


Epoch 383/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 384 | Loss: 0.0760



Epoch 384/400:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 385 | Loss: 0.0578







Epoch 385/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 386 | Loss: 0.0719


Epoch 386/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 387 | Loss: 0.0518


Epoch 387/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 388 | Loss: 0.0418


Epoch 388/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 389 | Loss: 0.0410


Epoch 389/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 390 | Loss: 0.0500


Epoch 390/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 391 | Loss: 0.0499


Epoch 391/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 392 | Loss: 0.0474


Epoch 392/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 393 | Loss: 0.0458


Epoch 393/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 394 | Loss: 0.0757


Epoch 394/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 395 | Loss: 0.0384


Epoch 395/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 396 | Loss: 0.0394


Epoch 396/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 397 | Loss: 0.0353


Epoch 397/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 398 | Loss: 0.0373


Epoch 398/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 399 | Loss: 0.0324


Epoch 399/400:   0%|          | 0/3 [00:00<?, ?it/s]


Epoch 400 | Loss: 0.0331
Checkpoint saved for epoch 400 at checkpoints/model_epoch_400.pth


In [None]:
# # Create checkpoints directory if it doesn't exist
# os.makedirs("checkpoints", exist_ok=True)

In [12]:
test_model, src_vocab, trg_vocab = tools.load_test_model(
    checkpoint_path,
    TransformerClass=bb.Transformer,
    embed_size=EMBED_DIM,
    device=DEVICE,
    max_length=MAX_LENGTH
)

✅ Model loaded successfully from checkpoints/model_epoch_400.pth


  checkpoint = torch.load(checkpoint_path, map_location=device)


In [13]:

print("testing translation ...")

# Test translation
test_sentence = "good morning"
print(f'Input: {test_sentence}')
print(f'Output: {tools.translate(test_sentence, test_model, src_vocab, trg_vocab, DEVICE)}')

testing translation ...
Input: good morning
Output: guten morgen.
