# Resize image for training

In [1]:
from PIL import Image
import os
import re
import sys
import concurrent
sys.path.insert(0, '../../')
from data_loader import resolve_env_variable, load_image_file_raw, get_image_files

Use pillow to resize images to 128x128 for training.
(images larger than 128x128 will be resized to 128x128, images smaller than will be ignored)

In [2]:
# risize an image using pillow
def resize_image(image, size, image_path):
    try:
      return image.resize(size)
    except OSError as e:
      print(e.strerror)
      if 'image file is truncated' in e.__repr__() or (e.strerror is not None and 'image file is truncated' in e.strerror):
        print("Image file is truncated. Skipping file: " + os.path.basename(image_path))
        return None
      raise e

# save an image using pillow
def save_image(image, image_path):
    image.save(image_path)

# convert an image to grayscale using pillow
def convert_to_grayscale(image):
    return image.convert('L')

In [3]:
allow_env = True
path_to_images = resolve_env_variable('../../1_data_collection/.data', 'FILE_LOCATION', allow_env, 'IMAGE_FILE_LOCATION')
path_to_processed_images = resolve_env_variable('../../1_data_collection/.data', 'IMAGE_FILE_LOCATION', allow_env, 'FILE_LOCATION')
num_workers = 16

In [4]:
image_paths = get_image_files(path_to_images)

print(f"{len(image_paths)} files")

# do not scale images smaller than this threshold
size_threshold = 500

already_small = 0

def do_resize(image_path):
  global already_small
  with load_image_file_raw(image_path) as image:
    if image.size[0] < size_threshold or image.size[1] < size_threshold:
      already_small += 1
      return
    resized_image = resize_image(image, (image.size[0]//4, image.size[1]//4), image_path)
    if resized_image is None:
      return
    # Save the resized image
    save_image(resized_image, os.path.join(path_to_processed_images, os.path.basename(image_path)))
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
    # Download and log every 100 files using a generator
    # First initialize the generator
    current_file = 0
    for _ in executor.map(do_resize, image_paths):
      current_file += 1
      if (current_file and current_file % 1000 == 0) or current_file == len(image_paths):
        print('Resized ' + str(current_file - already_small) + ' files')

print('Skipped ' + str(already_small) + ' already resized files')

279272 files
None
Image file is truncated. Skipping file: geoguessr_location_singleplayer_03MrvgM0hjFtwQVB_3.png
Resized -7 files
Resized -1 files
Resized 0 files
Resized -7 files
Resized -5 files
Resized 1 files
Resized -11 files
Resized -2 files
Resized -3 files
Resized 0 files
Resized 1 files
Resized -8 files
Resized 1 files
Resized -2 files
Resized 0 files
Resized -12 files
Resized -4 files
Resized 1 files
Resized -11 files
Resized 1 files
Resized 1 files
Resized 1 files
Resized -2 files
Resized 0 files
Resized -12 files
Resized -4 files
Resized 1 files
Resized -9 files
Resized -12 files
Resized 1 files
Resized 1 files
Resized -6 files
Resized 0 files
Resized -7 files
Resized -9 files
Resized -6 files
Resized 1 files
Resized -7 files
Resized 1 files
Resized -4 files
Resized -6 files
Resized -2 files
Resized 1 files
Resized 1 files
Resized -2 files
Resized 1 files
Resized -1 files
Resized -6 files
Resized -7 files
Resized -3 files
Resized -1 files
Resized -8 files
Resized 1 files
Re

KeyboardInterrupt: 