In [2]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

### Loss analysis

In [3]:
from torch.utils.data import DataLoader
from src.mintrans import FibonacciModDataset, MinimalTransformer, evaluate_model, train_model
import torch


data = FibonacciModDataset(num_samples=10)
print(data.__getitem__(0))

(tensor([1, 9, 0, 9, 9, 8, 7, 5, 2]), tensor([9, 0, 9, 9, 8, 7, 5, 2, 7]))


### With the default `10` epoch

In [4]:
vocab_size = 10
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.1441
Epoch 2, Loss: 1.2618
Epoch 3, Loss: 0.9267
Epoch 4, Loss: 0.7759
Epoch 5, Loss: 0.6811
Epoch 6, Loss: 0.6167
Epoch 7, Loss: 0.5732
Epoch 8, Loss: 0.5448
Epoch 9, Loss: 0.5210
Epoch 10, Loss: 0.4979
Accuracy: 83.90%


### Epoch increased to `100`

In [5]:
train_model(model, train_loader, epochs=40)
evaluate_model(model, train_loader)

Epoch 1, Loss: 0.4846
Epoch 2, Loss: 0.4690
Epoch 3, Loss: 0.4615
Epoch 4, Loss: 0.4463
Epoch 5, Loss: 0.4471
Epoch 6, Loss: 0.4266
Epoch 7, Loss: 0.4218
Epoch 8, Loss: 0.4189
Epoch 9, Loss: 0.4160
Epoch 10, Loss: 0.4088
Epoch 11, Loss: 0.3962
Epoch 12, Loss: 0.3897
Epoch 13, Loss: 0.3866
Epoch 14, Loss: 0.3922
Epoch 15, Loss: 0.4037
Epoch 16, Loss: 0.3823
Epoch 17, Loss: 0.3750
Epoch 18, Loss: 0.3741
Epoch 19, Loss: 0.3739
Epoch 20, Loss: 0.3692
Epoch 21, Loss: 0.3742
Epoch 22, Loss: 0.3838
Epoch 23, Loss: 0.3679
Epoch 24, Loss: 0.3633
Epoch 25, Loss: 0.3623
Epoch 26, Loss: 0.3766
Epoch 27, Loss: 0.3807
Epoch 28, Loss: 0.3620
Epoch 29, Loss: 0.3564
Epoch 30, Loss: 0.3661
Epoch 31, Loss: 0.3566
Epoch 32, Loss: 0.3691
Epoch 33, Loss: 0.3785
Epoch 34, Loss: 0.3568
Epoch 35, Loss: 0.3536
Epoch 36, Loss: 0.3703
Epoch 37, Loss: 0.3568
Epoch 38, Loss: 0.3509
Epoch 39, Loss: 0.3510
Epoch 40, Loss: 0.3501
Accuracy: 88.28%


We have `~5.42%` accuracy increase with `10` times more epoch

### Increasing the batch size from `32` to `64` with epoch as `10`

Accuracy goes from ~75-80%

In [6]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.3181
Epoch 2, Loss: 1.7512
Epoch 3, Loss: 1.1647
Epoch 4, Loss: 0.9790
Epoch 5, Loss: 0.8682
Epoch 6, Loss: 0.7867
Epoch 7, Loss: 0.7257
Epoch 8, Loss: 0.6788
Epoch 9, Loss: 0.6413
Epoch 10, Loss: 0.6126
Accuracy: 80.22%


### Switching back to default `batch_size` of `32`

In [7]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.1483
Epoch 2, Loss: 1.4015
Epoch 3, Loss: 1.0054
Epoch 4, Loss: 0.8531
Epoch 5, Loss: 0.7542
Epoch 6, Loss: 0.6858
Epoch 7, Loss: 0.6266
Epoch 8, Loss: 0.5769
Epoch 9, Loss: 0.5439
Epoch 10, Loss: 0.5197
Accuracy: 82.99%


## Splitted dataset 

In [8]:
from torch.utils.data import random_split

vocab_size = 10
generated_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_size = int(0.8 * len(generated_ds)) # 80% to train
test_size = len(generated_ds) - train_size # rest of the size

train_ds, test_ds = random_split(generated_ds, [train_size, test_size]) # randomly splits our dataset

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)

evaluate_model(model, test_loader)

Epoch 1, Loss: 2.2322
Epoch 2, Loss: 1.6406
Epoch 3, Loss: 1.1809
Epoch 4, Loss: 0.9556
Epoch 5, Loss: 0.8453
Epoch 6, Loss: 0.7810
Epoch 7, Loss: 0.7390
Epoch 8, Loss: 0.7071
Epoch 9, Loss: 0.6866
Epoch 10, Loss: 0.6683
Accuracy: 76.17%


## Splitted dataset and increase in number of epoch.

In [None]:
import torch.nn as nn 

def train_model(model, dataloader, epochs=10, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    model.train()

    accuracy_per_e = []

    for epoch in range(epochs):
        total_loss = 0
        correct, total = 0, 0

        for x, y in dataloader:
            optimizer.zero_grad()
            logits = model(x)
            loss = loss_fn(logits.view(-1, logits.size(-1)), y.view(-1))
            loss.backward()
            optimizer.step()

            pred = logits.argmax(dim=-1)
            correct += (pred == y).sum().item()
            total += y.numel()
            total_loss += loss.item()

        acc = correct / total
        avg_loss = total_loss / len(dataloader)
        accuracy_per_e.append(acc)

        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

    print(f"Mean accuracy across epochs: {sum(accuracy_per_e) / len(accuracy_per_e):.4%}")

train_model(model, train_loader, epochs=100)
evaluate_model(model, test_loader)

Epoch 1, Loss: 0.3936
Epoch 2, Loss: 0.3860
Epoch 3, Loss: 0.3886
Epoch 4, Loss: 0.3804
Epoch 5, Loss: 0.3822
Epoch 6, Loss: 0.3952
Epoch 7, Loss: 0.3831
Epoch 8, Loss: 0.3873
Epoch 9, Loss: 0.3879
Epoch 10, Loss: 0.3789
Epoch 11, Loss: 0.3812
Epoch 12, Loss: 0.3798
Epoch 13, Loss: 0.3795
Epoch 14, Loss: 0.3873
Epoch 15, Loss: 0.3951
Epoch 16, Loss: 0.3798
Epoch 17, Loss: 0.3832
Epoch 18, Loss: 0.3793
Epoch 19, Loss: 0.3802
Epoch 20, Loss: 0.3908
Epoch 21, Loss: 0.3797
Epoch 22, Loss: 0.3764
Epoch 23, Loss: 0.3776
Epoch 24, Loss: 0.3834
Epoch 25, Loss: 0.3955
Epoch 26, Loss: 0.3815
Epoch 27, Loss: 0.3784
Epoch 28, Loss: 0.3784
Epoch 29, Loss: 0.3780
Epoch 30, Loss: 0.3761
Epoch 31, Loss: 0.3864
Epoch 32, Loss: 0.3801
Epoch 33, Loss: 0.3730
Epoch 34, Loss: 0.3889
Epoch 35, Loss: 0.3854
Epoch 36, Loss: 0.3793
Epoch 37, Loss: 0.3768
Epoch 38, Loss: 0.3894
Epoch 39, Loss: 0.3755
Epoch 40, Loss: 0.3741
Epoch 41, Loss: 0.3828
Epoch 42, Loss: 0.3785
Epoch 43, Loss: 0.3845
Epoch 44, Loss: 0.38