In [1]:
import torch
import numpy as np
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import PIL.Image as Image
import transforms as T
from engine import train_one_epoch, evaluate
import utils
from MathExpressionDataset import MEdataset
import os


In [2]:

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [3]:
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 109 #108 LaTeX symbols + the background/nothing

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [4]:
trainDir = r'C:\Users\maxwe\Desktop\My Documents\MathExprSolverMx\MathExprSolverMx\AidaCalculusHandWrittenMathDataset\archive\train'
testDir = r'C:\Users\maxwe\Desktop\My Documents\MathExprSolverMx\MathExprSolverMx\AidaCalculusHandWrittenMathDataset\archive\test'


In [5]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# use our dataset and defined transformations
dataset = MEdataset(trainDir, get_transform(train=True))
dataset_test = MEdataset(testDir, get_transform(train=False))

# split the dataset in train and test set
indicesTrain = torch.randperm(len(dataset)).tolist()
indicesTest = torch.randperm(len(dataset_test)).tolist()
dataset = torch.utils.data.Subset(dataset, indicesTrain[:])
dataset_test = torch.utils.data.Subset(dataset_test, indicesTest[:])


In [6]:
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [7]:
# move model to the right device
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [8]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [9]:
# let's train it for 10 epochs
num_epochs = 2

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

savePath = os.path.join(os.getcwd(), 'mathRecognizerMx_2epochwBgrd.pt')
torch.save(model.state_dict(), savePath)

Epoch: [0]  [     0/109000]  eta: 5 days, 4:33:52  lr: 0.000010  loss: 6.9283 (6.9283)  loss_classifier: 4.6709 (4.6709)  loss_box_reg: 0.8901 (0.8901)  loss_objectness: 1.2621 (1.2621)  loss_rpn_box_reg: 0.1051 (0.1051)  time: 4.1141  data: 2.7203  max mem: 1123
Epoch: [0]  [    10/109000]  eta: 16:43:59  lr: 0.000060  loss: 9.4688 (8.5632)  loss_classifier: 4.5665 (4.5871)  loss_box_reg: 0.9019 (0.8940)  loss_objectness: 2.9505 (2.4840)  loss_rpn_box_reg: 0.6464 (0.5981)  time: 0.5527  data: 0.2487  max mem: 1346
Epoch: [0]  [    20/109000]  eta: 11:58:20  lr: 0.000110  loss: 6.1406 (7.1299)  loss_classifier: 4.3784 (4.2516)  loss_box_reg: 0.9019 (0.8176)  loss_objectness: 0.3495 (1.5073)  loss_rpn_box_reg: 0.6076 (0.5534)  time: 0.2096  data: 0.0012  max mem: 1561
Epoch: [0]  [    30/109000]  eta: 10:13:18  lr: 0.000160  loss: 4.5878 (6.1985)  loss_classifier: 3.1503 (3.6431)  loss_box_reg: 0.8960 (0.7888)  loss_objectness: 0.1409 (1.1727)  loss_rpn_box_reg: 0.5911 (0.5938)  time: 0