In [11]:
import torch
import torchvision

from torch.utils.data import Dataset, DataLoader

from torchvision import transforms

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import random
import math

#The dataset is in PIL, needs to be Tensors.
transform = transforms.Compose([
    transforms.ToTensor()
])

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

torch.cuda.is_available()
torch.cuda.device_count()
torch.cuda.current_device()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1660 SUPER'

In [13]:
def format_integer(number):
    str_len = 10
    number_str = str(number)
    if len(number_str) > str_len:
        number_str = number_str[:str_len]
    elif len(number_str) < str_len:
        number_str = number_str.zfill(str_len)
    return number_str

def generate_sample():
    start, end = 0, 9999999999
    a = random.randint(start, end)
    b = random.randint(start, end)
    return format_integer(a) + format_integer(b), format_integer(a + b) # both should be pytorch tensors. floats are too small to hold the number.
    # actually, label being a torch.double is fine, as long as it's < 9007199254740992! (double)

#generate the dataset
dataset_size = 10000
train_percent = 0.8
train_size = math.floor(dataset_size * train_percent)
test_size = math.floor(dataset_size * (1 - train_percent))

random.seed(42)

# I'll start with random, then try other things. some ideas:
# - keep the format, but only generate training data with 5-digit ints. see if the model can generalize.
# - try generating a training set with only 5-digit ints, but most of the training data is for below 3digits. Does this help or hurt generalization?

train_data = []
test_data = []

for i in range(train_size):
    train_data.append(generate_sample())

for i in range(test_size):
    test_data.append(generate_sample())


('27463172138697354961', '1144367217')
('11812419430958682846', '2139924789')
('31631197858963334018', '1212645380')


In [None]:
class CustomDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        s = self.samples[index]
        return s.input, s.label

In [9]:
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

99999999980000000001
20


In [None]:
class LinearNet(nn.Module):
    def __init__(self):
        super().__init__()
        #define the layers here
        self.fn1 = nn.Linear(20, 200)
        self.fn2 = nn.Linear(200, 50)
        self.fn3 = nn.Linear(50, 20)
    
    def forward(self, x):
        #defined the forward pass here
        x = F.relu(self.fn1(x))
        x = F.relu(self.fn2(x))
        x = self.fn3(x)
        return x

In [None]:
net = LinearNet()
net.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
total_parameters = count_parameters(net)
print(f"Total number of parameters: {total_parameters}")

In [None]:
for epoch in range(100):
    for i, data in enumerate(train_dataloader):
        cur_iter += 1
        x_batch.append(cur_iter)

        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # loss and accuracy for the current batch
        batch_loss = loss.item()
        print(f'[{epoch + 1}, {i + 1:5d}] loss: {batch_loss :.3f}')
        training_loss_list.append(batch_loss)
        batch_acc = (torch.argmax(outputs, dim=1) == labels).sum().item() / 64
        train_acc_list.append(batch_acc)

        #validation loss and model accuracy for the entire test dataset - every 100 batches
        if (i % 500 == 499):
            x_batch_500.append(cur_iter)
            correct = 0
            total = 0
            running_loss = 0
            iterations = 0
            with torch.no_grad():
                for data in test_dataloader:
                    inputs, labels = data[0].to(device), data[1].to(device)
                    outputs = net(inputs)

                    running_loss += criterion(outputs, labels)
                    iterations += 1

                    total += labels.size(0)
                    correct += (torch.argmax(outputs, dim=1) == labels).sum().item()
            test_acc = correct/total
            test_acc_list.append(test_acc)
            test_loss = running_loss/iterations
            test_loss_list.append(test_loss.item())