In [3]:
import torch
import torch.utils.data as data
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import transforms
from functools import partial


from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np

import random
import os
import time
import json

import math

In [4]:
%matplotlib inline

In [5]:
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    try:
        with open(path, 'rb') as f:
            with Image.open(f) as img:
                return img.convert('L')
    except OSError:
        pass

In [6]:
def show(img):
    npimg = np.asarray(img)
    plt.figure()
    plt.imshow(npimg, interpolation='nearest', cmap='gray')

In [7]:
def tightest_image_crop(img, preserve_aspect_ratio=False):
    image_indices = img.nonzero()
    image_indices = np.array(list(zip(image_indices[0], image_indices[1])))

    top_i = image_indices[0,0]
    bottom_i = image_indices[-1,0]
    
    mins = image_indices.min(axis=0)
    left_i = mins[1]
    
    maxs = image_indices.max(axis=0)
    right_i = maxs[1]
    
    new_width = right_i-left_i+1
    new_height = top_i-bottom_i+1
        
    if preserve_aspect_ratio:
        if new_width > new_height:
            result = img[:, top_i:top_i+new_width, left_i:right_i+1]
            return img[:, top_i:top_i+new_width, left_i:right_i+1]
        else:
            result = img[:, top_i:bottom_i+1, left_i:left_i+new_height]
            return img[:, top_i:bottom_i+1, left_i:left_i+new_height]
        
    return img[top_i:bottom_i+1, left_i:right_i+1]

In [8]:
def overlap_concat(img1, img2, overlap_amt):
    left = img1[:,0:len(img1[0]) - overlap_amt]
    mid = img1[:,len(img1[0])-overlap_amt : len(img1[0])] + img2[:, 0:overlap_amt]
    values, counts = np.unique(mid, return_counts=True)
    values = list(values)
    overlapping = 0
    if 254 in values:
        overlapping = counts[values.index(254)]
    right = img2[:,overlap_amt:]

    return np.concatenate((left, mid, right), axis = 1), overlapping

In [9]:
def get_concat(img1, img2, max_over):
    for i in range(1, min(len(img1[0]), len(img2[0]))):
        new, overlap = overlap_concat(img1, img2, i)
        if overlap >= max_over:
            if i == 1:
                new, _ = overlap_concat(img1, img2, i)
            else:
                slide_back = random.randrange(1, 10)
                overlap = max(1, i - slide_back)
                new, _ = overlap_concat(img1, img2, overlap)
            return new
    new, _ = overlap_concat(img1, img2, 1)
    return new

In [10]:
def vertical_scale_preserve_aspect_ratio(img, height):
    w, h = img.size
    
    return img.resize((int(height*(w/h)),height))
    

In [13]:
def make_numbers(min_digits, max_digits, num_digit_dist, num_train, num_val, output_dir, input_dir):
    start = time.time()
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    all_image_paths = list(list(map(lambda file: os.path.join(os.path.join(input_dir,str(i)), file), \
                                os.listdir(os.path.join(input_dir,str(i))))) for i in range(10))
    if not os.path.exists(os.path.join(output_dir, "train")):
        os.makedirs(os.path.join(output_dir, "train"))
        
    if not os.path.exists(os.path.join(output_dir, "validation")):
        os.makedirs(os.path.join(output_dir, "validation"))
    train_image_paths = []
    validation_image_paths = []
    for i in range(len(all_image_paths)):
            train_image_paths.append(random.sample(all_image_paths[i], int(0.75*len(all_image_paths[i]))))
            validation_image_paths.append(random.sample(all_image_paths[i], int(0.25*len(all_image_paths[i]))))
    
    labels = {}
    for i in range(num_train):
        zero_padding = random.randrange(1,3)
        if i % 1000 == 0:
            print("train iteration: {}, time so far: {}s".format(i,time.time() - start))
            
        num_length = random.choices(range(min_digits,max_digits + 1), num_digit_dist)[0]
        number = random.randrange(int(str(1)+(str(0)*(num_length-1))), int(str(9)*num_length))
        labels[i] = number
        img_paths = []
        for digit in str(number):
            image_index = random.randrange(0, len(train_image_paths[int(digit)]))
            img_paths.append(train_image_paths[int(digit)][image_index])
        img = pil_loader(img_paths[0])
        img = np.asarray(img)
        img = tightest_image_crop(img)
        img = Image.fromarray(img)
        img = vertical_scale_preserve_aspect_ratio(img, 32 - (2*zero_padding))
        img = np.asarray(img)
        img = np.pad(img, zero_padding, 'constant', constant_values=0)
        
        rand_over = random.randrange(1,3)
        for j in range(1, len(img_paths)):
            img2 = pil_loader(img_paths[j])
            img2 = np.asarray(img2)
            img2 = tightest_image_crop(img2)
            img2 = Image.fromarray(img2)
            img2 = vertical_scale_preserve_aspect_ratio(img2, 32 - (2*zero_padding))
            
            img2 = np.asarray(img2)
            img2 = np.pad(img2, zero_padding, 'constant', constant_values=0)
            img = get_concat(img, img2, rand_over)
        
        
        img = Image.fromarray(img)
        image_name = str(i) + '.png'
        new_image_path = os.path.join(os.path.join(output_dir, "train"), image_name)
        img.save(new_image_path)

    with open(os.path.join(os.path.join(output_dir, "train"), "labels_train.json"), 'w') as outfile:
        json.dump(labels, outfile)
    print("time after training dataset: {}s".format(time.time() - start))
    
    labels = {}
    for i in range(num_val):
        zero_padding = random.randrange(1,3)
        if i % 1000 == 0:
            print("validation iteration: {}, time so far: {}s".format(i,time.time() - start))
            
        num_length = random.choices(range(min_digits,max_digits + 1), num_digit_dist)[0]
        number = random.randrange(int(str(1)+(str(0)*(num_length-1))), int(str(9)*num_length))
        labels[i] = number
        img_paths = []
        for digit in str(number):
            image_index = random.randrange(0, len(validation_image_paths[int(digit)]))
            img_paths.append(validation_image_paths[int(digit)][image_index])
        img = pil_loader(img_paths[0])
        img = np.asarray(img)
        img = tightest_image_crop(img)
        img = Image.fromarray(img)
        img = vertical_scale_preserve_aspect_ratio(img, 32 - (2*zero_padding))
        img = np.asarray(img)
        img = np.pad(img, zero_padding, 'constant', constant_values=0)
        
        rand_over = random.randrange(1,3)
        for j in range(1, len(img_paths)):
            img2 = pil_loader(img_paths[j])
            img2 = np.asarray(img2)
            img2 = tightest_image_crop(img2)
            img2 = Image.fromarray(img2)
            img2 = vertical_scale_preserve_aspect_ratio(img2, 32 - (2*zero_padding))
            
            img2 = np.asarray(img2)
            img2 = np.pad(img2, zero_padding, 'constant', constant_values=0)
            img = get_concat(img, img2, rand_over)
        
        
        img = Image.fromarray(img)
        image_name = str(i) + '.png'
        new_image_path = os.path.join(os.path.join(output_dir, "validation"), image_name)
        img.save(new_image_path)

    with open(os.path.join(os.path.join(output_dir, "validation"), "labels_validation.json"), 'w') as outfile:
        json.dump(labels, outfile)
    print("final time: {}s".format(time.time() - start))

In [14]:
make_numbers(1, 6, [0.1,0.2, 0.25, 0.25, 0.1, 0.1], 500, 100, "./sanbornTight", "./CharLabelOutput/")

train iteration: 0, time so far: 16.43820095062256s
time after training dataset: 82.9247670173645s
validation iteration: 0, time so far: 82.92492294311523s
final time: 97.11299800872803s
