In [1]:
import pickle, torch
from torch import nn
from torch.utils.data import DataLoader
from torchsummary import summary
import time

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# load dataset from .pickle file generated by pickle_dataset
dataset = {}
with open('dataset.pickle', 'rb') as file:
    dataset = pickle.load(file)

In [66]:
BATCH_SIZE = 10
train_dataloader = DataLoader(dataset['train'], BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset['val'], BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(dataset['test'], BATCH_SIZE, shuffle=True)

In [55]:
class MathClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(1, 10, 6),
            nn.ReLU(),
            nn.AvgPool2d(2),
            nn.Conv2d(10, 20, 6),
            nn.ReLU(),
            nn.AvgPool2d(3),
            nn.Flatten(),
            nn.Linear(500, 82),
        )
    
    def forward(self, x):
        return self.layers.forward(x)

summary(MathClassifier().to(device), (1, 45, 45))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 40, 40]             370
              ReLU-2           [-1, 10, 40, 40]               0
         AvgPool2d-3           [-1, 10, 20, 20]               0
            Conv2d-4           [-1, 20, 15, 15]           7,220
              ReLU-5           [-1, 20, 15, 15]               0
         AvgPool2d-6             [-1, 20, 5, 5]               0
           Flatten-7                  [-1, 500]               0
            Linear-8                   [-1, 82]          41,082
Total params: 48,672
Trainable params: 48,672
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.35
Params size (MB): 0.19
Estimated Total Size (MB): 0.54
----------------------------------------------------------------


In [67]:
model = MathClassifier().to(device)
def train_loop():
    # statistics
    start_time = time.time()
    num_correct = 0
    num_total = 0

    # loss function & optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters())

    for X, y in iter(train_dataloader):
        # forward pass
        logits = model.forward(X.to(device))
        pred_y = torch.argmax(logits, 1)

        # update statistics
        for i in range(len(pred_y)):
            num_correct += pred_y[i] == y[i]
            num_total += 1

        # calculate gradient
        loss = criterion(logits, y.to(device))
        loss.backward()

        # backward pass
        optimizer.step()
        optimizer.zero_grad()
    
    print("Training Loop:")
    print("Accuracy: {:.2f}%".format(100*(num_correct/num_total)))
    print("Elapsed Time: {:.2f} min".format((time.time() - start_time)/60))
train_loop()

Training Loop:
Accuracy: 78.12%
Elapsed Time: 2.20 min


In [68]:
def val_loop():
    # statistics
    start_time = time.time()
    num_correct = 0
    num_total = 0

    for X, y in iter(val_dataloader):
        # forward pass
        logits = model.forward(X.to(device))
        pred_y = torch.argmax(logits, 1)

        # update statistics
        for i in range(len(pred_y)):
            num_correct += pred_y[i] == y[i]
            num_total += 1
    
    print("Validation Loop:")
    print("Accuracy: {:.2f}%".format(100*(num_correct/num_total)))
    print("Elapsed Time: {:.0f}s".format(time.time() - start_time))
val_loop()

Validation Loop:
Accuracy: 87.27%
Elapsed Time: 6s
