In [None]:
import numpy as np
from os import listdir
from skimage import io
import matplotlib.pyplot as plt
from scipy import ndimage
from skimage.transform import resize, rotate
from skimage.util import random_noise, invert
from skimage.color import gray2rgb
import tensorflow as tf
import math
import os
import json
import cv2

In [None]:
read_path = "extracted_images"
train = True
if train:
    write_single_path = "normalized/train"
    formula_path = "formulas/train/fractions"
else:
    write_single_path = "normalized/test"
    formula_path = "formulas/test/fractions"

In [None]:
label_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '+', '=', 'leq', 'neq', 'geq', 'alpha',
               'beta', 'lt', 'gt', 'x', 'y', 'd', 'int', '(', ')', '!', ',', 'sqrt']

label_names_dict = dict()
for label,no in zip(label_names,list(range(len(label_names)))):
    label_names_dict[label] = no
    #os.mkdir(write_single_path+"/"+label)
print(label_names_dict)

In [None]:
def crop(img):
    crop = np.copy(img)/255
    h,w = img.shape
    left = 0
    while left < w//2 and np.sum(crop[:,left]) >= 0.98*h:
        left += 1
    right = w-1
    while right > w//2 and np.sum(crop[:,right]) >= 0.98*h:
        right -= 1
    if left > 0:
        left -1
    if right < h-1:
        right += 1
    crop = crop[:,left:right]
    
    top = 0
    while top < h//2 and np.sum(crop[top,:]) >= 0.98*w:
        top += 1
    bottom = h-1
    while bottom > h//2 and np.sum(crop[bottom,:]) >= 0.98*w:
        bottom -= 1
    if top > 0:
        top -= 1
    if bottom < h-1:
        bottom += 1
    crop = crop[top:bottom,:]*255
    return crop

def add_symbol_to_image(img,folder,choices,padding,minsize,maxsize,bpower=False,bsmall=False,bnom=False,bden=False,
                        width=False, change_padding=True):
    choice = np.random.randint(len(choices))
    symbol_img = io.imread(read_path+"/"+folder+"/"+choices[choice])
    new_width = np.random.randint(minsize,maxsize+1)
    new_height = np.random.randint(minsize,maxsize+1)
    if width is not False:
        new_width = width
    symbol_img_res = resize(symbol_img, (new_height, new_width), cval=1)*255
    symbol_img_res = crop(symbol_img_res)
    new_height, new_width = symbol_img_res.shape
    shift = np.random.randint(-4+(60-new_height)//2,4+(60-new_height)//2)

    
    bounding_box = {
        'xmin': padding,
        'xmax': padding+new_width,
        'ymin': 65+shift-15*bpower+10*bsmall-30*bnom+30*bden,
        'ymax': 65+shift+new_height-15*bpower+10*bsmall-30*bnom+30*bden,
        'class_text': folder,
        'class': label_names_dict[folder]
    }
    
    if folder == "y" or folder == "beta":
        bounding_box['ymin'] += 10
        bounding_box['ymax'] += 10
                       
    
    xmin, xmax = bounding_box['xmin'],bounding_box['xmax']
    ymin, ymax = bounding_box['ymin'],bounding_box['ymax']
    
    img[ymin:ymax,xmin:xmax] += invert(symbol_img_res)+254
    if change_padding==True:
        padding += new_width+np.random.randint(2,5)
    
    return img,padding,bounding_box

def add_rectangles(img, bounding_boxes):
    img_color = np.asarray(np.dstack((img, img, img)), dtype=np.uint8)
    for bounding_box in bounding_boxes[1:]:
        xmin, xmax = bounding_box['xmin'], bounding_box['xmax']
        ymin, ymax = bounding_box['ymin'], bounding_box['ymax']
        img_color[ymin,xmin:xmax] = [255,0,0]
        img_color[ymax,xmin:xmax] = [255,0,0]
        img_color[ymin:ymax,xmin] = [255,0,0]
        img_color[ymin:ymax,xmax] = [255,0,0]
    return img_color

In [None]:
def normalize_single(symbol):
    symbol = np.copy(symbol).astype(np.float32)
    symbol /= np.max(symbol)
    rows, cols = symbol.shape
    # scale to 40x40
    inner_size = 40
    if rows > cols:
        factor = inner_size/rows
        rows = inner_size
        cols = int(round(cols*factor))
        inner = cv2.resize(symbol, (cols,rows))
    else:
        factor = inner_size/cols
        cols = inner_size
        rows = int(round(rows*factor))
        inner = cv2.resize(symbol, (cols, rows))
        
    # pad to 48x48
    outer_size = 48
    colsPadding = (int(math.ceil((outer_size-cols)/2.0)),int(math.floor((outer_size-cols)/2.0)))
    rowsPadding = (int(math.ceil((outer_size-rows)/2.0)),int(math.floor((outer_size-rows)/2.0)))
    outer = np.pad(inner,(rowsPadding,colsPadding),'constant', constant_values=(1,1))
    
    # center the mass
    shiftx,shifty = getBestShift(outer)
    shifted = shift(outer,shiftx,shifty)
    return shifted
    
def getBestShift(img):
    inv = invert(img)
    cy,cx = ndimage.measurements.center_of_mass(inv)

    rows,cols = img.shape
    shiftx = np.round(cols/2.0-cx).astype(int)
    shifty = np.round(rows/2.0-cy).astype(int)

    return shiftx,shifty

def shift(img,sx,sy):
    rows,cols = img.shape
    M = np.float32([[1,0,sx],[0,1,sy]])
    shifted = cv2.warpAffine(img,M,(cols,rows), borderValue=1)
    return shifted  

In [None]:
path = "extracted_images"
label_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '+', '=', 'leq', 'neq', 'geq', 'alpha',
               'beta', 'lt', 'gt', 'x', 'y', 'd', 'int', '(', ')', '!', ',', 'sqrt']
#for data by index normalization
#label_names = [","]
nof_labels = len(label_names)
nof_images = 0

# get number of images
labels_dict = dict()
i = 0
for label in label_names:
    files = listdir(path+"/"+label)
    nof_images += len(files)
    labels_dict[label] = i
    i += 1
print("#nof_images: ", nof_images)
print(labels_dict)

images = np.zeros((nof_images, 48, 48), dtype=np.float32)
labels = np.zeros(nof_images, dtype=np.int)

In [None]:
#normalazing data by index
path = "C:/Users/Grzegorz/Desktop/inz/projekt/projekt_2/HE2LaTeX-master/extracted_images/"
path_2 = "C:/Users/Grzegorz/Desktop/inz/projekt/projekt_2/HE2LaTeX-master/normalized/train/"
label_name = ","
i = 0
for i in range(len(files)):
    if i % 100 == 0:
            print("At i=%d" % i)
    img = cv2.imread(path+label_name+"/"+files[i], cv2.IMREAD_GRAYSCALE)
    img_2 = normalize_single(img)
    io.imsave(path_2+label_name+"/"+files[i], img_2)
    i += 1

print("done")

In [None]:
#simple int equations
def get_random():
    random=np.random.randint(100)
    if random<50:
        rand_numb=np.random.randint(1,10)
    if random<90 and random>49:
        rand_numb=np.random.randint(1,100)
    if random>89:
        rand_numb=np.random.randint(1,1000)
    return rand_numb
x=get_random()

list_digits = []
for i in range(10):
    list_digits.append(listdir(read_path+"/"+str(i)))
    list_plus = listdir(read_path+"/+")
    list_minus = listdir(read_path+"/-")
    list_lt = listdir(read_path+"/lt")
    list_gt= listdir(read_path+"/gt")
    list_leq = listdir(read_path+"/leq")
    list_geq = listdir(read_path+"/geq")
    list_neq = listdir(read_path+"/neq")
    list_equal = listdir(read_path+"/=")
    list_alpha = listdir(read_path+"/alpha")
    list_beta = listdir(read_path+"/beta")
    list_x = listdir(read_path+"/x")
    list_y = listdir(read_path+"/y")
    list_d = listdir(read_path+"/d")
    list_int = listdir(read_path+"/int")
    list_openp = listdir(read_path+"/(")
    list_closep = listdir(read_path+"/)")
    list_exclamation = listdir(read_path+"/!")
    list_coma = listdir(read_path+"/,")
    list_sqrt = listdir(read_path+"/sqrt")

for i in range(10):
    list_digits[i] = list_digits[i][len(list_digits[i])//8:]
    list_plus = list_plus[len(list_plus)//8:]
    list_minus = list_minus[len(list_minus)//8:]
    list_lt = list_lt[len(list_lt)//8:]
    list_gt = list_gt[len(list_gt)//8:]
    list_leq = list_leq[len(list_leq)//8:]
    list_geq = list_geq[len(list_geq)//8:]
    list_neq = list_neq[len(list_neq)//8:]
    list_equal = list_equal[len(list_equal)//8:]
    list_alpha = list_alpha[len(list_alpha)//8:]
    list_beta = list_beta[len(list_beta)//8:]
    list_x = list_x[len(list_x)//8:]
    list_y = list_y[len(list_y)//8:]
    list_d = list_d[len(list_d)//8:]


list_mid = [list_minus,list_plus]
list_end = [list_lt,list_gt,list_leq,list_geq,list_neq,list_equal]  
list_variables = [list_alpha,list_beta,list_x,list_y]
var_names = ["#alpha", "#beta", "x", "y"]
#os.mkdir(write_path_add)


bounding_boxes = []
for i in range(2000):
    random_name = str(np.random.randint(1,99999))
    img = np.zeros((200,60*(4+4+4)))
    upper = np.random.randint(2,1000)
    lower = np.random.randint(1,upper)
    filename = "#int #limits{"+str(lower)+"}{"+str(upper)+"} "
    num_strs=[None,None,None,None,None,None]
    num_5=None
    num_4=None
    num_3=None
    num_2=None
    num_1=None
    num_0=None
    var_5=None
    var_4=None
    var_3=None
    var_2=None
    var_1=None
    var_0=None
    x5 = np.random.randint(2)
    if x5:
        num_5 = np.random.randint(2)
        if num_5:
            num_5 = str(get_random())
            num_strs[0]=num_5
            num_5 = num_5+"x^5"
        else:
            num_5 = "x^5"
        var_5=np.random.randint(2)
        if var_5:
            var_5 = None
        else:
            var_5 = "-"
            filename = filename + var_5
        filename = filename+str(num_5)
    x4 = np.random.randint(2)
    if x4:
        num_4 = np.random.randint(2)
        if num_4:
            num_4 = str(get_random())
            num_strs[1]=num_4
            num_4 = num_4 + "x^4"
        else:
            num_4 = "x^4"
        var_4=np.random.randint(2)
        if var_4:
            var_4 = "+"
        else:
            var_4 = "-"
        filename = filename + var_4
        filename = filename + num_4
    x3 = np.random.randint(2)
    if x3:
        num_3 = np.random.randint(2)
        if num_3:
            num_3 = str(get_random())
            num_strs[2]=num_3
            num_3 = num_3 + "x^3"
        else:
            num_3 = "x^3"
        var_3=np.random.randint(2)
        if var_3:
            var_3 = "+"
        else:
            var_3 = "-"
        filename = filename + var_3
        filename = filename + num_3
    x2 = np.random.randint(2)
    if x2:
        num_2 = np.random.randint(2)
        if num_2:
            num_2 = str(get_random())
            num_strs[3]=num_2
            num_2 = num_2 + "x^2"
        else:
            num_2 = "x^2"
        var_2=np.random.randint(2)
        if var_2:
            var_2 = "+"
        else:
            var_2 = "-"
        filename = filename + var_2
        filename = filename + num_2
    x1 = np.random.randint(2)
    if x1:
        num_1 = np.random.randint(2)
        if num_1:
            num_1 = str(get_random())
            num_strs[4]=num_1
            num_1 = num_1 + "x"
        else:
            num_1 = "x"
        var_1=np.random.randint(2)
        if var_1:
            var_1 = "+"
        else:
            var_1 = "-"
        filename = filename + var_1
        filename = filename + num_1
    x0 = np.random.randint(2)
    if x0:
        num_0 = str(get_random())
        num_strs[5]=num_0
        num_0 = num_0
        var_0=np.random.randint(2)
        if var_0:
            var_0 = "+"
        else:
            var_0 = "-"
        filename = filename + var_0
        filename = filename + num_0
            
    filename = filename+"dx_"+random_name+".jpg"
    print("Filename: ", filename)
    bounding_box = [{'filename': filename}]
    padding = 5
    class_names = []
    
    img, padding, new_bounding_box = add_symbol_to_image(img,'int',list_int,padding,65,70)
    bounding_box.append(new_bounding_box)
    class_names.append('int')
    
    for t in range(len(str(upper))):
        if t<len(str(lower)):
            lwr=str(lower)[t]
            img, padding, new_bounding_box = add_symbol_to_image(img,lwr,list_digits[int(lwr)],padding,34,34,bden=True,change_padding=False)
            bounding_box.append(new_bounding_box)
            class_names.append(lwr)
        upr=str(upper)[t]
        img, padding, new_bounding_box = add_symbol_to_image(img,upr,list_digits[int(upr)],padding,34,34,bnom=True)
        bounding_box.append(new_bounding_box)
        class_names.append(upr)
    
    if var_5:
        if var_5=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
    if num_5:
        w=True
        for u in range(len(num_5)):
            if num_5[u]!="x" and w:
                img, padding, new_bounding_box = add_symbol_to_image(img,num_5[u],list_digits[int(num_5[u])],padding,35,40)
                bounding_box.append(new_bounding_box)
                class_names.append(num_5[u])
            elif num_5[u]=="x":
                img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40,bsmall=True)
                bounding_box.append(new_bounding_box)
                class_names.append('x')
                img, padding, new_bounding_box = add_symbol_to_image(img,'5',list_digits[5],padding,34,34,bpower=True)
                bounding_box.append(new_bounding_box)
                class_names.append('5')
                w=False
    
    if var_4:
        if var_4=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
        else:
            img, padding, new_bounding_box = add_symbol_to_image(img,"+",list_plus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('+') 
    if num_4:
        w=True
        for u in range(len(num_4)):
            if num_4[u]!="x" and w:
                img, padding, new_bounding_box = add_symbol_to_image(img,num_4[u],list_digits[int(num_4[u])],padding,35,40)
                bounding_box.append(new_bounding_box)
                class_names.append(num_4[u])
            elif num_4[u]=="x":
                img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40,bsmall=True)
                bounding_box.append(new_bounding_box)
                class_names.append('x')
                img, padding, new_bounding_box = add_symbol_to_image(img,'4',list_digits[4],padding,34,34,bpower=True)
                bounding_box.append(new_bounding_box)
                class_names.append('4')
                w=False
                
    if var_3:
        if var_3=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
        else:
            img, padding, new_bounding_box = add_symbol_to_image(img,"+",list_plus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('+') 
    if num_3:
        w=True
        for u in range(len(num_3)):
            if num_3[u]!="x" and w:
                img, padding, new_bounding_box = add_symbol_to_image(img,num_3[u],list_digits[int(num_3[u])],padding,35,40)
                bounding_box.append(new_bounding_box)
                class_names.append(num_3[u])
            elif num_3[u]=="x":
                img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40,bsmall=True)
                bounding_box.append(new_bounding_box)
                class_names.append('x')
                img, padding, new_bounding_box = add_symbol_to_image(img,'3',list_digits[3],padding,34,34,bpower=True)
                bounding_box.append(new_bounding_box)
                class_names.append('3')
                w=False
    
    if var_2:
        if var_2=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
        else:
            img, padding, new_bounding_box = add_symbol_to_image(img,"+",list_plus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('+') 
    if num_2:
        w=True
        for u in range(len(num_2)):
            if num_2[u]!="x" and w:
                img, padding, new_bounding_box = add_symbol_to_image(img,num_2[u],list_digits[int(num_2[u])],padding,35,40)
                bounding_box.append(new_bounding_box)
                class_names.append(num_2[u])
            elif num_2[u]=="x":
                img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40,bsmall=True)
                bounding_box.append(new_bounding_box)
                class_names.append('x')
                img, padding, new_bounding_box = add_symbol_to_image(img,'2',list_digits[2],padding,34,34,bpower=True)
                bounding_box.append(new_bounding_box)
                class_names.append('2')
                w=False
    
    if var_1:
        if var_1=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
        else:
            img, padding, new_bounding_box = add_symbol_to_image(img,"+",list_plus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('+') 
    if num_1:
        w=True
        for u in range(len(num_1)):
            if num_1[u]!="x" and w:
                img, padding, new_bounding_box = add_symbol_to_image(img,num_1[u],list_digits[int(num_1[u])],padding,35,40)
                bounding_box.append(new_bounding_box)
                class_names.append(num_1[u])
            elif num_1[u]=="x":
                img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40,bsmall=True)
                bounding_box.append(new_bounding_box)
                class_names.append('x')
                w=False
                
    if var_0:
        if var_0=="-":
            img, padding, new_bounding_box = add_symbol_to_image(img,"-",list_minus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('-')
        else:
            img, padding, new_bounding_box = add_symbol_to_image(img,"+",list_plus,padding,34,34)
            bounding_box.append(new_bounding_box)
            class_names.append('+') 
    if num_0:
        for u in range(len(num_0)):
            img, padding, new_bounding_box = add_symbol_to_image(img,num_0[u],list_digits[int(num_0[u])],padding,35,40)
            bounding_box.append(new_bounding_box)
            class_names.append(num_0[u])
            
    img, padding, new_bounding_box = add_symbol_to_image(img,'d',list_d,padding,35,40)
    bounding_box.append(new_bounding_box)
    class_names.append('d')
    img, padding, new_bounding_box = add_symbol_to_image(img,'x',list_x,padding,35,40)
    bounding_box.append(new_bounding_box)
    class_names.append('x')

    bounding_boxes.append(bounding_box)
    img = invert(img)+254
#     plt.figure(figsize=(20,10))
#     plt.imshow(img, cmap="gray")
#     plt.show()

#    for bb,cname in zip(bounding_box[1:],class_names):
#        xmin, xmax = bb['xmin'], bb['xmax']
#        ymin, ymax = bb['ymin'], bb['ymax']
#    
#        normed = normalize_single(img[ymin:ymax+1,xmin:xmax+1])
#        r = np.random.randint(9999)
#        io.imsave(write_single_path+"/"+cname+"/"+cname+"_"+str(r)+".jpg", normed)
    
    io.imsave(formula_path+"/"+filename, img/255)
    print("Finished: ", i)
#error when the image created can't fit resolution 