In [0]:
from PIL import *
from PIL import Image
import os
from numpy import linalg as LA
import numpy as np
import shutil
import operator

In [0]:
def parse_image(source, square_size, count, captures, offset=0, offset_type=0, print_coords=False):
    """
    Starts at top left corner of image. Iterates through image by square_size (width = height)
    across x values and after exhausting
    """
    src = Image.open(source)
#    global dimensions
    dimensions = src.size
    max_down = int(src.height/square_size) * square_size + square_size
    max_right = int(src.width/square_size) * square_size + square_size
    
    if offset_type == 1:
        tl_x = 0 + offset
        tl_y = 0
        br_x = square_size + offset 
        br_y = square_size

        for y in range(square_size,max_down,square_size):
            for x in range(square_size + offset,max_right - offset,square_size):
                if (tl_x,tl_y) not in captures:
                    sample = src.crop((tl_x,tl_y,br_x,br_y))
                    sample.save(f"{source[:-4]}_sample_{count}_x{tl_x}_y{tl_y}.jpg")
                    captures.append((tl_x,tl_y))

                    if print_coords == True: 
                        print(f"image {count}: top-left (x,y): {(tl_x,tl_y)}, bottom-right (x,y): {(br_x,br_y)}")
                    tl_x = x
                    br_x = x + square_size
                    count +=1                
                else:
                    continue
            tl_x = 0 + offset
            br_x = square_size + offset
            tl_y = y
            br_y = y + square_size
    else:
        tl_x = 0
        tl_y = 0 + offset
        br_x = square_size 
        br_y = square_size + offset
        
        for y in range(square_size + offset,max_down - offset,square_size):
            for x in range(square_size,max_right,square_size):
                if (tl_x,tl_y) not in captures:
                    sample = src.crop((tl_x,tl_y,br_x,br_y))
                    sample.save(f"{source[:-4]}_sample_{count}_x{tl_x}_y{tl_y}.jpg")
                    captures.append((tl_x,tl_y))
                    
                    if print_coords == True: 
                        print(f"image {count}: top-left (x,y): {(tl_x,tl_y)}, bottom-right (x,y): {(br_x,br_y)}")
                    tl_x = x
                    br_x = x + square_size
                    count +=1
                else:
                    continue
            tl_x = 0
            br_x = square_size 
            tl_y = y + offset
            br_y = y + square_size + offset
    return count, dimensions[0]

In [0]:
def create_images(source, square_size, captures, offset=0):
    src = Image.open(source)
    count = 0
    
    count = parse_image(source,square_size=square_size, captures=captures, count=count,
                         offset=0,offset_type=0,print_coords=False)[0]
    
    if offset != 0:
        runs_per_axis = int(square_size/offset)
        for run in range(runs_per_axis):
            run +=1
            count = parse_image(source,square_size=square_size, captures=captures, count=count,
                                 offset=run*offset,offset_type=0,print_coords=False)[0]
            count = parse_image(source,square_size=square_size, captures=captures, count=count, 
                                 offset=run*offset,offset_type=1,print_coords=False)[0]

In [0]:
def sort_images_by_pixels(source,image_folder,coordinates,square_size,min_distance, delete_distance,
                         pos_folder='pos',neg_folder='neg',del_folder='del'):
    pos_path = image_folder + '/' + pos_folder
    neg_path = image_folder + '/' + neg_folder
    del_path = image_folder + '/' + del_folder
    if os.path.exists(pos_path):
        shutil.rmtree(pos_path)
    os.mkdir(pos_path)
    if os.path.exists(neg_path):
        shutil.rmtree(neg_path)
    os.mkdir(neg_path)
    if os.path.exists(del_path):
        shutil.rmtree(del_path)
    os.mkdir(del_path)
    
    locations = [np.array([pixel_x,pixel_y]) for (pixel_x,pixel_y) in coordinates]
    endings = ['.jpg','.jpeg'] ###
    files = [x for x in os.listdir('.') if x[-4:] in endings and x != source]
    for f in files:
        s = f.split('_x')[1].split('_y')
        a = int(s[0]) + square_size/2
        b = int(s[1][:-4]) + square_size/2
        
        pair = np.array([a,b])

        distances = []
        for location in locations:
            distances.append(np.linalg.norm(pair-location))
        if min(distances) >= delete_distance:
            shutil.move(f, neg_path)
        elif min(distances) <= min_distance:
            shutil.move(f,pos_path)
        else:
            shutil.move(f, del_path)
    shutil.rmtree(del_path)

In [0]:
import json
def parse_json(root,json_obj,square_size,min_distance, delete_distance,offset):
    os.chdir(root)
    with open(json_obj,'r') as f:
        text = f.read()
    json_data = json.loads(text)
    image_names = [name for name in json_data.keys()]
    
    captures_taken = []
    for name in image_names:
        endings = ['.jpeg','.jpg']
        ending = [end for end in endings if end in name][0]
        f_loc = name.split(ending)[0]
        f_name = f_loc + ending
         
        
        new_path = root +'/'+ f_loc
        if os.path.exists(new_path):
            shutil.rmtree(new_path)
        os.mkdir(new_path)
        shutil.move(f_name, new_path)
        os.chdir(new_path)
        
        regions = json_data[name]['regions']
        attrs = [regions[i]['shape_attributes'] for i in regions.keys()]
        coords = [(i['cx'],i['cy']) for i in attrs]
        
        create_images(source=f_name,offset=offset,square_size=square_size, captures=captures_taken)
        sort_images_by_pixels(source=name,image_folder=new_path, coordinates=coords, delete_distance=delete_distance,
                              square_size=square_size, min_distance=min_distance)
        
        shutil.move(f_name, root)
        os.chdir(root)
        
        
            

In [0]:
def get_square_size(json_obj, root, windows_across):
    os.chdir(root)
    with open(json_obj,'r') as f:
        text = f.read()
    json_data = json.loads(text)
    image_names = [name for name in json_data.keys()]
    for name in image_names:
        endings = ['.jpeg','.jpg']
        ending = [end for end in endings if end in name][0]
        f_loc = name.split(ending)[0]
        f_name = f_loc + ending
        file_path = root + f_name
        
    print('File_path is:', file_path)
    width = Image.open(file_path).size[0]
    print('Image width is:', width)
    return width/windows_across


In [0]:
# run this block and change directories in next block if run on Google Colab rather than locally
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
###### enter these three parameters #############
windows_across = 26 # make this bigger for smaller squares, 26 seems good
my_root_dir = '/Users/James/UIC/IDS 576 Adv Pred Models/TURTLES/photos/' #directory where photos stored
my_json_file = 'via_region_data.json' #json file name from VGG
#################################################


square_size = int(get_square_size(json_obj=my_json_file, 
           root=my_root_dir, windows_across = windows_across))
min_distance=np.sqrt(2*(square_size/6)**2) # smaller numbers require more centered objects but lose some positive examples if too small
print('Window size is:', square_size)
                 
offset=int(square_size/3)
delete_distance=np.sqrt(2*((square_size/2)**2))

parse_json(json_obj=my_json_file, 
           root=my_root_dir, 
           square_size=square_size,min_distance=min_distance,offset=offset, delete_distance=delete_distance)




File_path is: /Users/James/UIC/IDS 576 Adv Pred Models/TURTLES/photos/DJI_0071.jpeg
Image width is: 4000
Window size is: 153
