<a href="https://colab.research.google.com/github/SwayamParida/handwritten-math-exp-recognition/blob/master/Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
PROJECT_DIR = '/content/drive/My Drive/cs231n/project'

Mounted at /content/drive


In [0]:
import os
from collections import defaultdict
from skimage import io, feature
from sklearn import linear_model, model_selection
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn, optim, utils
from torchvision import models, transforms

plt.rcParams['image.interpolation'] = 'nearest'
%matplotlib inline

## Atomic Symbol Dataset

The atomic symbol dataset is contained in the directory referenced by the relative filepath stored in `ATOMIC_SYMBOL_DATASET_DIR`. The dataset contains a subdirectory per math symbol with the directory name corresponding to the symbol name. Each subdirectory contains JPG images of the handwritten symbols that serve as training examples for that symbol class.

In [0]:
%cd drive/My\ Drive/cs231n/project/
!unzip -n data/data.zip -d data/handwritten-symbols

In [0]:
ATOMIC_SYMBOL_DATASET_DIR = os.path.join(PROJECT_DIR, 'data/handwritten-symbols/extracted_images')

In [0]:
def print_progress(i, num_items):
  progress = '%.2f' % (i / num_items * 100)
  progress = f'{progress}% done.'
  print('\b' * len(progress), end='')
  print(progress, end='')

In [0]:
class AtomicSymbolDataset(utils.data.Dataset):
  def __init__(self, root_dir, max_examples_per_class=None, transform=None):
    self.root_dir = root_dir
    self.max_examples_per_class = max_examples_per_class
    self.transform = transform
    self.num_classes = len(os.listdir(self.root_dir))
    self.size = 0
    self.build_dataset_info()

  def build_dataset_info(self):
    self.symbols = defaultdict(list)
    for i, d in enumerate(os.listdir(self.root_dir)):
      symbol_dir = os.path.join(self.root_dir, d)
      if not os.path.isdir(symbol_dir): continue
      for j, f in enumerate(os.listdir(symbol_dir)):
        if not '.jpg' in f: continue
        if j >= self.max_examples_per_class: break
        self.symbols[d].append(os.path.join(symbol_dir, f))
      print_progress(i, self.num_classes)
      self.size += len(self.symbols[d])
  
  def __getitem__(self, idx):
    for symbol, imgs in self.symbols.items():
      if idx < len(imgs):
        img = io.imread(imgs[idx])
        if self.transform is not None:
          img = self.transform(img)
        return (img, symbol)
      else:
        idx -= len(imgs)
    raise IndexError('Index out of bounds')
  
  def __len__(self):
    return self.size

In [6]:
symbol_dataset = AtomicSymbolDataset(ATOMIC_SYMBOL_DATASET_DIR, 100)

0.00% done.1.22% done.2.44% done.3.66% done.4.88% done.6.10% done.7.32% done.8.54% done.9.76% done.10.98% done.12.20% done.13.41% done.14.63% done.15.85% done.17.07% done.18.29% done.19.51% done.20.73% done.21.95% done.23.17% done.24.39% done.25.61% done.26.83% done.28.05% done.29.27% done.30.49% done.31.71% done.32.93% done.34.15% done.35.37% done.36.59% done.37.80% done.39.02% done.40.24% done.41.46% done.42.68% done.43.90% done.45.12% done.46.34% done.47.56% done.48.78% done.50.00% done.

OSError: ignored

Getting a feel for the dataset

In [0]:
random_index = np.random.choice(len(symbol_dataset))
sample_img, sample_img_label = symbol_dataset[random_index]
_ = plt.imshow(sample_img, cmap='Greys_r')
print(f'Symbol: {sample_img_label}')
print(f'Image dimensions: {sample_img.shape}')

In [0]:
print(f'Number of examples: {len(symbol_dataset)}')
# print(f'Number of training examples: {X_train.shape[0]}')
# print(f'Number of testing examples: {X_test.shape[0]}')
print(f'Number of classes: {symbol_dataset.num_classes}')

# Baseline 1 - Symbol Segmentation and Atomic Classification

To establish a baseline for the classification task, we shall develop a rudimentary model that segments each image of a mathematical expression into individual symbols and then feed these symbols to a Softmax classifer that has been trained on the atomic symbols dataset.

Segmenting expressions into individual symbols

In [0]:
def featurize(X, featurizers=None):
  X_feat = []
  for ex in X:
    if featurizers is None:
      x = np.ndarray.flatten(ex)
    else:
      x = np.concatenate([np.ndarray.flatten(featurizer(ex)) for featurizer in featurizers])
    X_feat.append(x)
  return np.array(X_feat)

Converting PyTorch Dataset object into 2D Numpy array

In [0]:
X = np.empty(shape=(len(symbol_dataset), *symbol_dataset[0][0].shape))
y = np.empty(shape=len(symbol_dataset), dtype=np.str)

progress = str()
for i in range(len(symbol_dataset)):
  X[i], y[i] = symbol_dataset[i]
  if i % 20 == 0: print_progress(i, len(symbol_dataset))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25)

Training a Softmax classifier on the raw pixels of the atomic symbols dataset

In [0]:
softmax_classifier = linear_model.LogisticRegression()
X_train_feat = featurize(X_train)
softmax_classifier.fit(X_train_feat, y_train)

train_acc = softmax_classifier.score(X_train_feat, y_train)
test_acc = softmax_classifier.score(featurize(X_test), y_test)
print(f'Train accuracy: {train_acc}')
print(f'Test accuracy: {test_acc}')

Training the classifier with DAISY feature descriptors instead of raw pixels

In [0]:
X_train_feat = featurize(X_train, [feature.daisy])
softmax_classifier.fit(featurize(X_train_feat), y_train)

train_acc = softmax_classifier.score(X_train_feat, y_train)
test_acc = softmax_classifier.score(featurize(X_test, [feature.daisy]), y_test)
print(f'Train accuracy: {train_acc}')
print(f'Test accuracy: {test_acc}')

Using AlexNet as a feature extractor

In [0]:
model_ft = models.alexnet(pretrained=True)

for param in model_ft.parameters():
  param.requires_grad = False
num_features = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_features, symbol_dataset.num_classes)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft.to(device)

print(model_ft)

Perform image transformations so that dataset images fit AlexNet configurations

In [0]:
transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(224),
    transforms.ToTensor()
])

In [0]:
test_train_split = [0.8, 0.2]
assert(np.sum(test_train_split) == 1.0)

symbol_dataset = AtomicSymbolDataset(ATOMIC_SYMBOL_DATASET_DIR, 100, transforms)

split_lengths = np.round(np.multiply(test_train_split, len(symbol_dataset)))
split_lengths = split_lengths.astype(int)
assert(np.sum(split_lengths) == len(symbol_dataset))

train_set, test_set = utils.data.random_split(symbol_dataset, split_lengths)
train_loader = utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=2)
test_loader = utils.data.DataLoader(test_set, batch_size=4, shuffle=True, num_workers=2)

In [0]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = alexnet(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')