# Maths Detection Installation

In [None]:
# # Get the sample image for showcasing 
# !wget https://sharelatex-wiki-cdn-671420.c.cdn77.org/learn-scripts/images/f/fe/Amsexample.png

# Install old version of the packages (this might take a while)
!pip install torch==1.1.0 torchvision==0.3.0 -q

# Download the pre trained model by MaliParag from his Google Drive link
!gdown 1bGNvg9uLCTbVE9hk1yWE-2tLgX1eg_me

# Clone repo from Github
!git clone https://github.com/MaliParag/ScanSSD.git

--2022-10-25 02:39:07--  https://sharelatex-wiki-cdn-671420.c.cdn77.org/learn-scripts/images/f/fe/Amsexample.png
Resolving sharelatex-wiki-cdn-671420.c.cdn77.org (sharelatex-wiki-cdn-671420.c.cdn77.org)... 185.76.10.3, 185.76.10.12, 185.59.222.21, ...
Connecting to sharelatex-wiki-cdn-671420.c.cdn77.org (sharelatex-wiki-cdn-671420.c.cdn77.org)|185.76.10.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 88919 (87K) [image/png]
Saving to: ‘Amsexample.png’


2022-10-25 02:39:07 (5.29 MB/s) - ‘Amsexample.png’ saved [88919/88919]

[K     |████████████████████████████████| 676.9 MB 3.9 kB/s 
[K     |████████████████████████████████| 2.6 MB 52.1 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchtext 0.13.1 requires torch==1.12.1, but you have torch 1.1.0 which is incompatible.
torchaudio 0.12.1+cu113 requires torch==1.12.1, bu

In [None]:
# Brute force restart of the Kernel
import os
os._exit(00)

In [None]:
# Fix relative import issues
import sys, os
sys.path.insert(0, os.path.join('/content', 'ScanSSD'))
sys.path.insert(0, os.path.join('/content', 'ScanSSD', 'layers'))
sys.path.insert(0, os.path.join('/content', 'ScanSSD', 'gtdb'))


In [None]:
from collections import OrderedDict
import cv2
import math
import numpy as np
import argparse
import torch
import torch.nn as nn
from torchvision import transforms
from ScanSSD.ssd import build_ssd
from ScanSSD.data import config

In [None]:
# For viewing images in Colab (You must remove if running this locally)
from google.colab.patches import cv2_imshow

In [None]:
class ArgStub():
    def __init__ (self):
        self.cuda = False
        self.kernel = (1, 5)
        self.padding = (0, 2)
        self.phase = 'test'
        self.visual_threshold = 0.6
        self.verbose = False
        self.exp_name = 'SSD'
        self.model_type = 512
        self.use_char_info = False
        self.limit = -1
        self.cfg = 'hboxes512'
        self.batch_size = 4
        self.num_workers = 2
        self.neg_mining = True
        self.log_dir = 'logs'
        self.stride = 0.1
        self.window = 1200


def draw_box (image, boxes):
    for b in boxes:
        cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), 2)


def _img_to_tensor (image):
    rimg = cv2.resize(image, (512, 512),
                      interpolation = cv2.INTER_AREA).astype(np.float32)
    rimg -= np.array((246, 246, 246), dtype=np.float32)
    rimg = rimg[:, :, (2, 1, 0)]
    return torch.from_numpy(rimg).permute(2, 0, 1)


def FixImgCoordinates (images, boxes):
    new_boxes = []
    if isinstance(images, list):
            for i in range(len(images)):
                print(images[i].shape)
                bbs = []
                for o_box in boxes[i] :
                    b = [None] * 4
                    b[0] = int(o_box[0] * images[i].shape[0])
                    b[1] = int(o_box[1] * images[i].shape[1])
                    b[2] = int(o_box[2] * images[i].shape[0])
                    b[3] = int(o_box[3] * images[i].shape[1])
                    bbs.append(b)

                new_boxes.append(bbs)
    else:
        bbs = []
        for o_box in boxes[0] :
            b = [None] * 4
            b[0] = int(o_box[0] * images.shape[0])
            b[1] = int(o_box[1] * images.shape[1])
            b[2] = int(o_box[2] * images.shape[0])
            b[3] = int(o_box[3] * images.shape[1])
            bbs.append(b)

            # this could be
            # b[0] = int(o_box[0] * images.shape[0]) ==> b[0] = int(o_box[0] * images.shape[1])
            # b[1] = int(o_box[1] * images.shape[1]) ==> b[1] = int(o_box[1] * images.shape[0])
            # b[2] = int(o_box[2] * images.shape[0]) ==> b[2] = int(o_box[2] * images.shape[1])
            # b[3] = int(o_box[3] * images.shape[1]) ==> b[3] = int(o_box[3] * images.shape[0])

        new_boxes.append(bbs)

    return new_boxes


def DrawAllBoxes(images, boxes):
    for i in range(len(images)):
        draw_box(images[i], boxes[i])

class MathDetector():

    def __init__(self, weight_path, args):
        net = build_ssd(args, 'test', config.exp_cfg[args.cfg], 0, args.model_type, 2)
        self._net = net # nn.DataParallel(net)
        weights = torch.load(weight_path, map_location = torch.device('cpu'))

        new_weights = OrderedDict()
        for k, v in weights.items():
            name = k[7:] # remove `module.`
            new_weights[name] = v

        self._net.load_state_dict(new_weights)
        self._net.eval()

    def Detect (self, thres, images):

        cls = 1                 # math class
        boxes = []
        scores = []

        y, debug_boxes, debug_scores = self._net(images)  # forward pass

        detections = y.data

        for k in range(len(images)):

            img_boxes = []
            img_scores = []
            for j in range(detections.size(2)):

                if ( detections[k, cls, j, 0] < thres ):
                    continue

                pt = detections[k, cls, j, 1:]
                coords = (pt[0], pt[1], pt[2], pt[3])
                img_boxes.append(coords)
                img_scores.append(detections[k, cls, j, 0])

            boxes.append(img_boxes)
            scores.append(img_scores)

        return boxes, scores

    def ShowNetwork (self):
        print(self._net)

    def DetectAny (self, thres, image):
        if isinstance(image, list):
            t_list = [_img_to_tensor(img) for img in image]
            t = torch.stack(t_list, dim = 0)
        else:
            t = _img_to_tensor(image).unsqueeze(0)
        # fix box coordinates to image pixel coordinates
        boxes, scores = self.Detect(thres, t)
        return FixImgCoordinates(image, boxes), scores

md = MathDetector('AMATH512_e1GTDB.pth', ArgStub())


In [None]:
# Load image for detection
img = cv2.imread('/content/maths_tets_page-0002.jpg', cv2.IMREAD_COLOR)

# Container for detection results
borders = []

# Fill smaller images with 0s
image_height, image_width, channels = img.shape

# Compute the number of rolling windows
nwindows_vertical = math.ceil(image_height / 512)
nwindows_horizontal = math.ceil(image_width / 512)

# Use a rolling window of 515px for detection
for i in range(nwindows_vertical):
  for j in range(nwindows_horizontal):

    window_x_start = min(512*j, image_width)
    window_x_end = min(window_x_start+512, image_width)
    window_y_start = min(512*i, image_height)*i
    window_y_end = min(window_y_start+512, image_height)
    window_width = window_x_end-window_x_start
    window_height = window_y_end-window_y_start

    rolling_window = img[window_y_start:window_y_end, window_x_start:window_x_end]

    # create new image of desired size with white background
    new_image_width = 512
    new_image_height = 512
    color = (255,255,255)
    padded_window = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)
    # compute center offset
    x_center = (new_image_width - window_width) // 2
    y_center = (new_image_height - window_height) // 2

    # Copy the window to the center of the white square
    padded_window[y_center:y_center+window_height, x_center:x_center+window_width] = rolling_window

    # Detect math elements
    window_borders, scores = md.DetectAny(0.33, padded_window)
    
    # Fix borders to the whole image, not just the window
    for k in range(len(window_borders[0])): 
      window_borders[0][k][0] = window_x_start+(window_borders[0][k][0]-(new_image_width-window_width)//2)
      window_borders[0][k][1] = window_y_start+(window_borders[0][k][1]-(new_image_height-window_height)//2)
      window_borders[0][k][2] = window_x_start+(window_borders[0][k][2]-(new_image_width-window_width)//2)
      window_borders[0][k][3] = window_y_start+(window_borders[0][k][3]-(new_image_height-window_height)//2) 
    borders = borders + window_borders[0]
  
# Do not delete this 
borders = [borders]

In [None]:
display(borders)
img_c = np.copy(img)
DrawAllBoxes([img_c], borders)

cv2_imshow(img_c)

# Extras

In [None]:
boxes = [
  [412, 454, 511, 510],
  [236, 261, 519, 322],
  [367, 187, 510, 235],
  [431, 333, 510, 387],
  [504, 178, 825, 244],
  [636, 447, 787, 507],
  [514, 450, 604, 507],
  [510, 266, 962, 318],
  [510, 332, 760, 396],
  [389, 695, 510, 730],
  [219, 834, 508, 895],
  [194, 968, 218, 996],
  [477, 625, 512, 663],
  [363, 531, 510, 557],
  [261, 529, 301, 551],
  [226, 538, 309, 554],
  [514, 834, 971, 897],
  [515, 681, 799, 743],
  [508, 627, 720, 661],
  [512, 913, 740, 949],
  [810, 763, 873, 791]
  ]

In [None]:
img = cv2.imread('/content/maths_tets_page-0007.jpg', cv2.IMREAD_COLOR)

In [None]:
from PIL import Image

def crop(image_path, coords, saved_location):
    image_obj = Image.open(image_path)
    cropped_image = image_obj.crop(coords)
    cropped_image.save(saved_location)
    cropped_image.show()

image = "/content/maths_tets_page-0007.jpg"
idx = 0
for box in boxes:
  box = tuple(box)
  crop(image, box, f'/content/cropped/cropped_{idx}_.jpg')
  idx = idx + 1

In [None]:
!zip -r cropped.zip /content/cropped/

# Maths Recognition 

In [None]:
!pip install pix2tex -q

[K     |████████████████████████████████| 560 kB 30.5 MB/s 
[K     |████████████████████████████████| 431 kB 62.0 MB/s 
[K     |████████████████████████████████| 41 kB 540 kB/s 
[K     |████████████████████████████████| 3.2 MB 59.7 MB/s 
[K     |████████████████████████████████| 5.5 MB 58.9 MB/s 
[K     |████████████████████████████████| 6.6 MB 60.4 MB/s 
[K     |████████████████████████████████| 182 kB 72.2 MB/s 
[?25h  Building wheel for entmax (setup.py) ... [?25l[?25hdone


In [None]:
#if after restart kernal the bellow function doesnt work
!pip uninstall Pillow
!pip install Pillow

Found existing installation: Pillow 9.3.0
Uninstalling Pillow-9.3.0:
  Would remove:
    /usr/local/lib/python3.7/dist-packages/PIL/*
    /usr/local/lib/python3.7/dist-packages/Pillow-9.3.0.dist-info/*
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libbrotlicommon-cf2297e4.so.1.0.9
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libbrotlidec-97e69943.so.1.0.9
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libfreetype-7d9be1ab.so.6.18.3
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libharfbuzz-5e08a948.so.0.50301.0
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libjpeg-b1f3a3b7.so.62.3.0
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/liblcms2-1e643a89.so.2.0.13
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/liblzma-816f5b19.so.5.2.7
    /usr/local/lib/python3.7/dist-packages/Pillow.libs/libopenjp2-fca9bf24.so.2.5.0
    /usr/local/lib/python3.7/dist-pack

In [None]:
from pix2tex.cli import LatexOCR
from PIL import Image

LatexModel = LatexOCR()
def forumla_detection(image_path):
    image = Image.open(image_path)
    return LatexModel(image)

download weights v0.0.1 to path /usr/local/lib/python3.7/dist-packages/pix2tex/model/checkpoints


weights.pth: 100%|██████████| 97.4M/97.4M [00:12<00:00, 8.22Mb/s]
image_resizer.pth: 100%|██████████| 18.5M/18.5M [00:05<00:00, 3.88Mb/s]


In [None]:
forumla_detection("/content/Screenshot 2022-11-18 at 5.07.25 PM.png")

'a x^{a}+\\mathbf{bx}+c=0'

In [None]:
import os

dir_list = os.listdir("/content/") 

for i in dir_list:
  if i.endswith(".jpg"):
    img_path = "/content/" + i
    formula = forumla_detection(img_path)
    print(i," - ",formula)

cropped_12_.jpg  -  \mathbf{M}{\big\{}
cropped_5_.jpg  -  \Omega_{q}={\frac{\pi^{2}}{6}}-\sum_{r=1}^{q}{\frac{1}{r^{2}}}.
cropped_10_.jpg  -  {\bf\tau}_{(001)T,t}^{(00i_{1})q}=\frac{(T-t)^{5/2}}{2}\biggl(\frac{1}{3}\zeta_{0}^{(i_{1})}+\frac{1}{\sqrt{2}\pi^{i}}
cropped_2_.jpg  -  J_{(0|)T,t}^{(0|\l_{i})q}=\frac{(T-t)}{2}
cropped_15_.jpg  -  {\mathfrak{I}}_{r}^{\gamma}\,,\;\;\{{\mathfrak{I}}_{r-1}^{\gamma},
cropped_4_.jpg  -  \stackrel{\bar{3}}{\longrightarrow}\left(\begin{array}{l l}{{\zeta^{\left(i_{1}\right)}}}&{{\sqrt{2}}}\\ {{\zeta^{\left(i_{0}\right)}}}&{{\strut}}\\ {{\strut\sim}}&{{\.T}}\end{array}\left(\stackrel{\left[i_{1}\right]}{\left.\sum_{T=1}^{\left(i_{1}\right)}+\sqrt{\Omega_{q}\right.}\zeta^{\left(i_{1}\right)}}\end{array}\right)\right)
cropped_13_.jpg  -  {\frac{z^{(l)}}{s q}}\ (r=1,\ldots,\,q;
cropped_7_.jpg  -  \begin{array}{l}{{\begin{array}{c}{{q}}\\ {{\sum_{-}\left(\langle\stackrel{\prime}{}_{1}\rangle\langle\langle i_{1}\rangle}{}(i_{1}\rangle}}\\ {{\sum_{-}\left(\

# Display latex in Jupyter Notebook 

In [None]:
from IPython.display import Latex

Latex("{\begin{array}{l l}{{\mathrm{AABGCCSC}\mathrm{ABQR~f~AM~are~alindes~of~AABC~and~APQR~respecively~and~AB^{2}:}}}\\ {{\mathrm{PQ^{2}=4:9,\qquad(81~4)~16.81~}}}&{{(8)~4;9}}&{{(\mathrm{(1)~3.7~}}}\\ {{(\mathrm{(4)~16.51~}}}&{{(81~4)~16.51~}}}&{{(8)~4.9}}\end{array}}")

<IPython.core.display.Latex object>