In [42]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

### Loss analysis

In [43]:
from torch.utils.data import DataLoader
from src.mintrans import FibonacciModDataset, MinimalTransformer, evaluate_model, train_model
import torch

data = FibonacciModDataset(num_samples=10)
print(data.__getitem__(0))

(tensor([3, 4, 7, 1, 8, 9, 7, 6, 3]), tensor([4, 7, 1, 8, 9, 7, 6, 3, 9]))


### With the default `10` epoch

In [44]:
vocab_size = 10
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.2003
Epoch 2, Loss: 1.3577
Epoch 3, Loss: 0.9477
Epoch 4, Loss: 0.7880
Epoch 5, Loss: 0.6930
Epoch 6, Loss: 0.6351
Epoch 7, Loss: 0.5943
Epoch 8, Loss: 0.5662
Epoch 9, Loss: 0.5380
Epoch 10, Loss: 0.5173
Accuracy: 83.13%


### Epoch increased to `100`

In [45]:
train_model(model, train_loader, epochs=40)
evaluate_model(model, train_loader)

Epoch 1, Loss: 0.4978
Epoch 2, Loss: 0.4833
Epoch 3, Loss: 0.4747
Epoch 4, Loss: 0.4611
Epoch 5, Loss: 0.4505
Epoch 6, Loss: 0.4420
Epoch 7, Loss: 0.4373
Epoch 8, Loss: 0.4289
Epoch 9, Loss: 0.4236
Epoch 10, Loss: 0.4191
Epoch 11, Loss: 0.4260
Epoch 12, Loss: 0.4086
Epoch 13, Loss: 0.4096
Epoch 14, Loss: 0.4064
Epoch 15, Loss: 0.3944
Epoch 16, Loss: 0.3892
Epoch 17, Loss: 0.3890
Epoch 18, Loss: 0.3952
Epoch 19, Loss: 0.3931
Epoch 20, Loss: 0.3843
Epoch 21, Loss: 0.3811
Epoch 22, Loss: 0.3832
Epoch 23, Loss: 0.3804
Epoch 24, Loss: 0.3780
Epoch 25, Loss: 0.3722
Epoch 26, Loss: 0.3765
Epoch 27, Loss: 0.3940
Epoch 28, Loss: 0.3672
Epoch 29, Loss: 0.3674
Epoch 30, Loss: 0.3660
Epoch 31, Loss: 0.3612
Epoch 32, Loss: 0.3615
Epoch 33, Loss: 0.3608
Epoch 34, Loss: 0.3605
Epoch 35, Loss: 0.3629
Epoch 36, Loss: 0.3578
Epoch 37, Loss: 0.3761
Epoch 38, Loss: 0.3697
Epoch 39, Loss: 0.3575
Epoch 40, Loss: 0.3507
Accuracy: 88.55%


We have `~5.42%` accuracy increase with `10` times more epoch

### Increasing the batch size from `32` to `64` with epoch as `10`

Accuracy goes from ~75-80%

In [53]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.3177
Epoch 2, Loss: 1.7166
Epoch 3, Loss: 1.1767
Epoch 4, Loss: 0.9945
Epoch 5, Loss: 0.8724
Epoch 6, Loss: 0.7809
Epoch 7, Loss: 0.7200
Epoch 8, Loss: 0.6767
Epoch 9, Loss: 0.6392
Epoch 10, Loss: 0.6128
Accuracy: 78.43%


### Switching back to default `batch_size`

In [None]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)