In [11]:
import os
import json
import cv2
import string
import random
import albumentations as A
import copy
import numpy as np
import matplotlib.pyplot as plt

In [2]:
template_directory = 'templates_img'

text_locations = json.load(open('text_locations.json', 'r'))

image_dir =  "data/"

backgrounds_dir = 'image_backgrounds'

In [3]:
# {'license_driver', 'dob', 'address_full', 'zip', 'name_last', 'email', 'name', 'name_first', 'business', 'state', 'initial_middle','address', 'city', 'relation'}

def generate_field(field):
    if field == 'ssn':
        res = ''.join(random.choices(string.digits, k=9))
        return res
    elif field == 'tele':
        res = ''.join(random.choices(string.digits, k=10))
        return res


def generate_random_field(length = 0):
    length = length if length else random.randint(2, 20)
    field = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=length))
    return field

generate_random_field()

"hY3Znj0_d'"

In [4]:
transform = A.Compose([
        # A.RandomRotate90(),
        # A.Flip(),
        # A.Transpose(),
        A.ImageCompression(quality_lower=10, p=0.1),
        A.OneOf([
            # A.IAAAdditiveGaussianNoise(),
            A.GaussNoise(p=0.8),
            A.ISONoise(p=0.2),
            A.MultiplicativeNoise(p=.05)
        ], p=0.1),
        A.OneOf([
            A.MotionBlur(p=.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.05),
        A.ShiftScaleRotate(shift_limit=0.15, scale_limit=0.2, rotate_limit=60, p=0.5),
        A.OneOf([
            A.OpticalDistortion(p=0.5),
            A.GridDistortion(p=.5),
            A.PiecewiseAffine(p=0.5),
        ], p=0.5),
        A.OneOf([
            A.CLAHE(clip_limit=2),
            A.Sharpen(),
            A.Emboss(),
            A.RandomBrightnessContrast(),            
        ], p=0.05),
        A.OneOf([
            A.RandomFog(),
            A.RandomRain(),
            A.RandomSnow(),
            A.RandomSunFlare(),            
        ], p=0.1),
        A.HueSaturationValue(p=0.01),
        # A.Rotate(66,p=0.3)
        # A.ToGray(always_apply=True)
    ])
# random.seed(42) 

In [5]:
fonts = [
    cv2.FONT_HERSHEY_SIMPLEX,
    cv2.FONT_HERSHEY_COMPLEX,
    cv2.FONT_HERSHEY_PLAIN,
    cv2.FONT_HERSHEY_DUPLEX,
    cv2.FONT_HERSHEY_TRIPLEX,
    cv2.FONT_HERSHEY_COMPLEX_SMALL,
    cv2.FONT_HERSHEY_SCRIPT_COMPLEX,
    cv2.FONT_HERSHEY_SCRIPT_COMPLEX,
    cv2.FONT_ITALIC]

In [23]:

for filename in text_locations:
    if text_locations[filename] != {}:
        img = cv2.imread(template_directory+ '/'+filename, 1)
        img = cv2.resize(img, (850, 1100)) 

        doc_info = text_locations[filename]
        for loc in doc_info:
            font = random.choice(fonts)
            cv2.putText(img, generate_random_field(),
                        (int(loc['x']),int(loc['y'])), font,
					    0.75, (255, 0, 0), 2)

        transformed = transform(image=img)
        img = transformed['image']
        cv2.imshow('image', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


In [34]:
def generate_target_dictionary():
    files = os.listdir(image_dir)
    types = list(set([file[:4] for file in files]))
    categories = {}
    for i in range(len(types)):
        categories[types[i]] = i
    return categories
categories = generate_target_dictionary()


backgrounds = []
for filename in os.listdir(backgrounds_dir):
    img = cv2.imread(backgrounds_dir+ '/'+filename, 1)
    backgrounds.append(img)

out_raw = np.memmap('image_data_raw.npy', mode='w+', shape = (150000,275,210))
out_binary = np.memmap('image_data_binary.npy', mode='w+', shape = (150000,275,210))
target = np.memmap('image_target.npy', dtype=np.int8, mode='w+', shape = (150000))
index = 0
for filename in text_locations:
    if text_locations[filename] != {}:
        image = cv2.imread(template_directory+ '/'+filename, 1)
        image = cv2.resize(image, (850, 1100)) 

        doc_info = text_locations[filename]
        for i in range(30000):
            background_img = backgrounds[random.randint(0, len(backgrounds))-1]
            img = image
            for loc in doc_info:
                font = random.choice(fonts)
                cv2.putText(img, generate_random_field(),
                            (int(loc['x']),int(loc['y'])), font,
                            0.75, (255, 0, 0), 2)
            # resize(210,275)
            x_size = random.randint(-200,400)
            y_size = random.randint(-200,400)
            x_size = x_size if x_size > 0 else 0
            y_size = y_size if y_size > 0 else 0
            x_offset = int(x_size/2)
            y_offset = int(y_size/2)
            background_img = cv2.resize(background_img, (850+x_size, 1100+y_size)) 

            background_img[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
            transformed = transform(image=cv2.resize(background_img, (210,275)))
            img = transformed['image']
            
            img = cv2.cvtColor(transformed['image'], cv2.COLOR_BGR2GRAY)
            out_raw[index] = img
            # print(img.shape)
            img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
            out_binary[index] = img
            target[index] = categories[filename[:4]]
            index+=1
            # print(img.shape)
            # cv2.imshow('image', img)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            print(index,end='\r')


150000

In [35]:
out_raw.flush()
out_binary.flush()
target.flush()
out_binary = np.memmap('image_data_raw.npy', mode='r', shape = (100000,275,210))
# for x in out_binary[:5]:
    # img = cv2.imread(x)

    # cv2.imshow('image', x, cmap='gray')
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
# plt.imshow(out_binary[5], cmap='gray')
cv2.imshow('image', out_binary[5])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [29]:
target.flush()
out_binary = np.memmap('image_target.npy', mode='r', shape = (100000))
print(out_binary[:30])

[1 1 1 1 1 4 4 4 4 4 2 2 2 2 2 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0]


In [85]:
backgrounds_dir = 'image_backgrounds'

for filename in os.listdir(backgrounds_dir):
    background_img = cv2.imread(backgrounds_dir+ '/'+filename, 1)
    x_size = random.randint(-200,200)
    y_size = random.randint(-200,200)
    x_size = x_size if x_size > 0 else 0
    y_size = y_size if y_size > 0 else 0
    x_offset = int(x_size/2)
    y_offset = int(y_size/2)
    background_img = cv2.resize(background_img, (850+x_size, 1100+y_size)) 

    out = background_img
    out[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
    cv2.imshow('image', background_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [126]:
backgrounds = []
for filename in os.listdir(backgrounds_dir):
    img = cv2.imread(backgrounds_dir+ '/'+filename, 1)
    backgrounds.append(img)

files = ['IncomeDrivenRepayment_0.jpg', "TeacherLoanForgiveness_0.jpg"]

for filename in text_locations:
    if text_locations[filename] != {}:
        image = cv2.imread(template_directory+ '/'+filename, 1)
        image = cv2.resize(image, (850, 1100)) 

        doc_info = text_locations[filename]

        file = filename.split('_')[0].strip()
        print(f'\n{file}')
        for i in range(7500,20000):
            background_img = backgrounds[random.randint(0, len(backgrounds)-1)]
            img = image
            for loc in doc_info:
                font = random.choice(fonts)
                cv2.putText(img, generate_random_field(),
                            (int(loc['x']),int(loc['y'])), font,
                            0.75, (255, 0, 0), 2)
            # resize(210,275)
            x_size = random.randint(-200,300)
            y_size = random.randint(-200,300)
            x_size = x_size if x_size > 0 else 0
            y_size = y_size if y_size > 0 else 0
            x_offset = int(x_size/2)
            y_offset = int(y_size/2)
            background_img = cv2.resize(background_img, (850+x_size, 1100+y_size)) 

            background_img[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
            transformed = transform(image=cv2.resize(background_img, (210,275)))
            img = transformed['image']
            # img = cv2.cvtColor(transformed['image'], cv2.COLOR_BGR2GRAY)
            # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
            cv2.imwrite(f'data/{file[:5]}_{i}.jpg', img)
            print(f'{i}',end='\r')


TeacherLoanForgiveness
19999
PublicServiceLoanForgivenessPSLFTemporaryExpandedPSLFTEPSLFCertificationApplication
19999
IncomeDrivenRepayment
19999
EconomicHardshipDeferment
19999
Consolidation
19999

In [12]:
backgrounds = []
for filename in os.listdir(backgrounds_dir):
    img = cv2.imread(backgrounds_dir+ '/'+filename, 1)
    backgrounds.append(img)

image_dir = "data/"
files = os.listdir(image_dir)
types = list(set([file[:4] for file in files]))
categories = {}
for i in range(len(types)):
    categories[types[i]] = i

arr_x = []
arr_y = []

for filename in text_locations:
    if text_locations[filename] != {}:
        image = cv2.imread(template_directory+ '/'+filename, 1)
        image = cv2.resize(image, (850, 1100)) 

        doc_info = text_locations[filename]

        file = filename.split('_')[0].strip()
        print(f'\n{file}')
        for i in range(20000):
            background_img = backgrounds[random.randint(0, len(backgrounds)-1)]
            img = image
            for loc in doc_info:
                font = random.choice(fonts)
                cv2.putText(img, generate_random_field(),
                            (int(loc['x']),int(loc['y'])), font,
                            0.75, (255, 0, 0), 2)
            # resize(210,275)
            x_size = random.randint(-200,300)
            y_size = random.randint(-200,300)
            x_size = x_size if x_size > 0 else 0
            y_size = y_size if y_size > 0 else 0
            x_offset = int(x_size/2)
            y_offset = int(y_size/2)
            background_img = cv2.resize(background_img, (850+x_size, 1100+y_size)) 

            background_img[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
            transformed = transform(image=cv2.resize(background_img, (210,275)))
            img = transformed['image']
            img = cv2.cvtColor(transformed['image'], cv2.COLOR_BGR2GRAY)
            # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
            # cv2.imwrite(f'data/{file[:5]}_{i}.jpg', img)
            arr_x.append(img)
            arr_y.append([categories[filename[:4]]])
            
            print(f'{i}',end='\r')

arr_x = np.array(arr_x)
arr_y = np.array(arr_y)
np.save(f'image_data', arr_x)
np.save(f'target_data', arr_y)
# print(arr_x.shape)


TeacherLoanForgiveness
19999
PublicServiceLoanForgivenessPSLFTemporaryExpandedPSLFTEPSLFCertificationApplication
19999
IncomeDrivenRepayment
19999
EconomicHardshipDeferment
19999
Consolidation
19999

In [20]:
print(arr_x.shape)
print(arr_y.shape)
print(type(arr_x[0][0][0]))


(100000, 275, 210)
(100000, 1)
<class 'numpy.uint8'>
