In [4]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

### Loss analysis

In [5]:
from torch.utils.data import DataLoader
from src.mintrans import FibonacciModDataset, MinimalTransformer, evaluate_model, train_model
import torch


data = FibonacciModDataset(num_samples=10)
print(data.__getitem__(0))

(tensor([8, 3, 1, 4, 5, 9, 4, 3, 7]), tensor([3, 1, 4, 5, 9, 4, 3, 7, 0]))


### With the default `10` epoch

In [6]:
vocab_size = 10
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.1572
Epoch 2, Loss: 1.2737
Epoch 3, Loss: 0.9176
Epoch 4, Loss: 0.7823
Epoch 5, Loss: 0.7055
Epoch 6, Loss: 0.6553
Epoch 7, Loss: 0.6161
Epoch 8, Loss: 0.5871
Epoch 9, Loss: 0.5622
Epoch 10, Loss: 0.5381
Accuracy: 83.25%


### Epoch increased to `100`

In [7]:
train_model(model, train_loader, epochs=40)
evaluate_model(model, train_loader)

Epoch 1, Loss: 0.5172
Epoch 2, Loss: 0.4973
Epoch 3, Loss: 0.4833
Epoch 4, Loss: 0.4684
Epoch 5, Loss: 0.4593
Epoch 6, Loss: 0.4512
Epoch 7, Loss: 0.4438
Epoch 8, Loss: 0.4385
Epoch 9, Loss: 0.4289
Epoch 10, Loss: 0.4227
Epoch 11, Loss: 0.4184
Epoch 12, Loss: 0.4129
Epoch 13, Loss: 0.4031
Epoch 14, Loss: 0.4011
Epoch 15, Loss: 0.3965
Epoch 16, Loss: 0.3895
Epoch 17, Loss: 0.3928
Epoch 18, Loss: 0.3939
Epoch 19, Loss: 0.3969
Epoch 20, Loss: 0.3798
Epoch 21, Loss: 0.3781
Epoch 22, Loss: 0.3747
Epoch 23, Loss: 0.3812
Epoch 24, Loss: 0.3729
Epoch 25, Loss: 0.3738
Epoch 26, Loss: 0.3698
Epoch 27, Loss: 0.3710
Epoch 28, Loss: 0.3625
Epoch 29, Loss: 0.3649
Epoch 30, Loss: 0.3810
Epoch 31, Loss: 0.3658
Epoch 32, Loss: 0.3620
Epoch 33, Loss: 0.3588
Epoch 34, Loss: 0.3580
Epoch 35, Loss: 0.3600
Epoch 36, Loss: 0.3653
Epoch 37, Loss: 0.3887
Epoch 38, Loss: 0.3596
Epoch 39, Loss: 0.3556
Epoch 40, Loss: 0.3542
Accuracy: 87.58%


We have `~5.42%` accuracy increase with `10` times more epoch

### Increasing the batch size from `32` to `64` with epoch as `10`

Accuracy goes from ~75-80%

In [8]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.3423
Epoch 2, Loss: 1.9069
Epoch 3, Loss: 1.7228
Epoch 4, Loss: 1.5168
Epoch 5, Loss: 1.3277
Epoch 6, Loss: 1.1855
Epoch 7, Loss: 1.0686
Epoch 8, Loss: 0.9578
Epoch 9, Loss: 0.8666
Epoch 10, Loss: 0.8015
Accuracy: 74.08%


### Switching back to default `batch_size` of `32`

In [9]:
vocab_size = 10 # that is mod in our case
train_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, train_loader)

Epoch 1, Loss: 2.1651
Epoch 2, Loss: 1.5080
Epoch 3, Loss: 1.0658
Epoch 4, Loss: 0.8772
Epoch 5, Loss: 0.7604
Epoch 6, Loss: 0.6915
Epoch 7, Loss: 0.6346
Epoch 8, Loss: 0.5920
Epoch 9, Loss: 0.5545
Epoch 10, Loss: 0.5319
Accuracy: 83.35%


## Splitted dataset 

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import random
from src.mintrans import *

vocab_size = 10
generated_ds = FibonacciModDataset(num_samples=5000, mod=vocab_size)
train_size = int(0.8 * len(generated_ds)) # 80% to train
test_size = len(generated_ds) - train_size # rest of the size

train_ds, test_ds = random_split(generated_ds, [train_size, test_size]) # randomly splits our dataset

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

model = MinimalTransformer(vocab_size=vocab_size)
train_model(model, train_loader)
evaluate_model(model, test_loader)

Epoch 1, Loss: 2.2020
Epoch 2, Loss: 1.4992
Epoch 3, Loss: 1.0956
Epoch 4, Loss: 0.9304
Epoch 5, Loss: 0.8221
Epoch 6, Loss: 0.7399
Epoch 7, Loss: 0.6786
Epoch 8, Loss: 0.6304
Epoch 9, Loss: 0.5904
Epoch 10, Loss: 0.5636
Accuracy: 82.68%
