In [1]:
import torch
import numpy as np
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import PIL.Image as Image
import transforms as T
from engine import train_one_epoch, evaluate
import utils
from MathExpressionDataset import MEdataset
import os


In [2]:

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [3]:
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 109 #108 LaTeX symbols + the background/nothing

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [4]:
trainDir = r'C:\Users\maxwe\Desktop\My Documents\MathExprSolverMx\MathExprSolverMx\AidaCalculusHandWrittenMathDataset\archive\train'
testDir = r'C:\Users\maxwe\Desktop\My Documents\MathExprSolverMx\MathExprSolverMx\AidaCalculusHandWrittenMathDataset\archive\test'


In [5]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# use our dataset and defined transformations
dataset = MEdataset(trainDir, get_transform(train=True))
dataset_test = MEdataset(testDir, get_transform(train=False))

# split the dataset in train and test set
indicesTrain = torch.randperm(len(dataset)).tolist()
indicesTest = torch.randperm(len(dataset_test)).tolist()
dataset = torch.utils.data.Subset(dataset, indicesTrain[:])
dataset_test = torch.utils.data.Subset(dataset_test, indicesTest[:])


In [6]:
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [7]:
# move model to the right device
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [8]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [9]:
# let's train it for 10 epochs
num_epochs = 2

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

savePath = os.path.join(os.getcwd(), 'mathRecognizerMx_2epochwBgrd.pt')
torch.save(model.state_dict(), savePath)

Epoch: [0]  [    0/54500]  eta: 2 days, 19:10:44  lr: 0.000010  loss: 8.9632 (8.9632)  loss_classifier: 4.8027 (4.8027)  loss_box_reg: 0.8858 (0.8858)  loss_objectness: 2.9559 (2.9559)  loss_rpn_box_reg: 0.3188 (0.3188)  time: 4.4375  data: 2.7510  max mem: 2191
Epoch: [0]  [   10/54500]  eta: 12:05:54  lr: 0.000060  loss: 8.9632 (8.7948)  loss_classifier: 4.6958 (4.7006)  loss_box_reg: 0.9224 (0.8675)  loss_objectness: 2.7816 (2.5493)  loss_rpn_box_reg: 0.5870 (0.6773)  time: 0.7993  data: 0.2518  max mem: 2833
Epoch: [0]  [   20/54500]  eta: 9:22:42  lr: 0.000110  loss: 6.2447 (7.3620)  loss_classifier: 4.3924 (4.3649)  loss_box_reg: 0.9190 (0.8487)  loss_objectness: 0.2765 (1.4981)  loss_rpn_box_reg: 0.5775 (0.6503)  time: 0.4288  data: 0.0019  max mem: 3203
Epoch: [0]  [   30/54500]  eta: 8:54:44  lr: 0.000160  loss: 4.8855 (6.3033)  loss_classifier: 3.1837 (3.7383)  loss_box_reg: 0.8848 (0.8148)  loss_objectness: 0.1026 (1.1449)  loss_rpn_box_reg: 0.4561 (0.6054)  time: 0.4734  da

RuntimeError: CUDA out of memory. Tried to allocate 200.00 MiB (GPU 0; 6.00 GiB total capacity; 3.26 GiB already allocated; 157.56 MiB free; 4.03 GiB reserved in total by PyTorch)