In [1]:
import os
import glob
import cv2
import math
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import clear_output

%matplotlib inline

In [2]:
# set file paths
path = r"E:/USGS/scans_SH/"
tile_path = r"E:/USGS/tiles/"

In [3]:
# make a giant gross function to do all the tiling
def tile_image(source_path, output_path, desired_samples_per_image, desired_tile_dimension):

    # get a list to process
    images_to_process = glob.glob(source_path + '*.TIFF')

    # loop through all the images
    for image in images_to_process:

        # get the image name (adjust if path changes, PLEASE READ THIS TIM)
        image_name = image[len(source_path):-5]

        # open the file
        img = Image.open(image)

        # get the dimensions of the image
        imageWidth, imageHeight = img.size

        # sets the x and y ranges 
        rangex = math.ceil(imageWidth / desired_tile_dimension)
        rangey = math.ceil(imageHeight / desired_tile_dimension)
        total_frames = rangex * rangey

        # make a list of random samples to crop
        random_sample_list = []
        number_of_samples = len(random_sample_list)
        
        # make a counter for attempts, used to escape loop if not enough tiles can be made
        samples_attempted = 0 
        
        # set a hard limit on the number of attempts, sometimes images just don't have enough 
        # samples. This limit should be increased if not enough samples are generated. Sometimes
        # there will be large memory usage however.
        while number_of_samples < desired_samples_per_image and samples_attempted < 100*desired_samples_per_image: 

            # get random x and y coordinates to sample, stay away from the edges of the scan
            rand_x = np.random.randint(4, rangex-4)
            rand_y = np.random.randint(6, rangey-6)

            # set the save name
            padding_x_zeros = len(str(rangex))
            padding_y_zeros = len(str(rangey))
            filename = '{}_{}_{}.png'.format(image_name, str(rand_y).zfill(padding_y_zeros), str(rand_x).zfill(padding_x_zeros))

            # proceed if it's a new tile
            if os.path.exists(output_path + filename) == False:

                # set the crop coordinates. box = (<start x>, <start y>, <end x>, <end y>)
                box = (rand_x*desired_tile_dimension, 
                       rand_y*desired_tile_dimension, 
                       rand_x*desired_tile_dimension+desired_tile_dimension, 
                       rand_y*desired_tile_dimension+desired_tile_dimension)

                # crop each tile
                tile = img.crop(box)
                
                # get the image as an array
                tile_array = np.asarray(tile)
                
             
                # convert to binary
                ret, binary_tile = cv2.threshold(tile_array, 127, 255, cv2.THRESH_BINARY)

                # ignore mostly white images
                if np.sum(binary_tile)/255 > 0.95*desired_tile_dimension*desired_tile_dimension:
                    samples_attempted += 1
            
                # ignore mostly black images
                elif np.sum(binary_tile)/255 < 0.7*desired_tile_dimension*desired_tile_dimension:
                    samples_attempted += 1
                
                # process the rest
                else:
                
                    # convert to an image
                    save_binary = Image.fromarray(binary_tile)

                    # save the tile to a log
                    with open(output_path + 'tile_log.csv', 'a') as outfile:
                        outfile.write('{}, {}, {}\n'.format(image_name, rand_x, rand_y))

                    # save the file
                    save_path = os.path.join(output_path, filename)
                    save_binary.save(save_path, optimize=False)

                    # update counter
                    number_of_samples += 1
                    samples_attempted += 1

                    print('{}: {}%'.format(image_name, round(100*number_of_samples/desired_samples_per_image), 2))
                    clear_output(wait=True)

        
    print('Completed')

In [4]:
tile_image(path, tile_path, 50, 200)

Completed
