In [1]:
from PIL import Image
import pandas as pd
import numpy as np
import os, sys
from concurrent.futures import ThreadPoolExecutor, as_completed, Future
from threading import Lock

In [2]:
def full_image_path(author_id, painting_id):
    return 'data/images_athenaeum/full/%d/%d.jpg' % (author_id, painting_id)
def half_image_path(author_id, painting_id):
    return 'data/images_athenaeum/half/%d' % author_id, '%d.jpg' % painting_id

In [3]:
paintings = pd.read_csv('data/athenaeum_paintings_sizes.csv')

In [4]:
paintings.shape

(207245, 12)

In [9]:
lock = Lock()

def convert_image(author_painting):
    author_id, painting_id = author_painting
    full_path = full_image_path(author_id, painting_id)
    target_path, target_fn = half_image_path(author_id, painting_id)
    with lock:
        if not os.path.exists(target_path):
            os.makedirs(target_path)
    target_path = os.path.join(target_path, target_fn)
    if not os.path.exists(target_path):
        im = Image.open(full_path)
        im = im.resize((im.width // 2, im.height // 2), Image.LANCZOS)
        im.save(target_path)
        return True

def check_image(author_painting):
    author_id, painting_id = author_painting
    full_path = full_image_path(author_id, painting_id)
    target_path, target_fn = half_image_path(author_id, painting_id)
    target_path = os.path.join(target_path, target_fn)
    try:
        im = Image.open(target_path)
        size = im.size
    except BaseException as e:
        if os.path.exists(target_path):
            os.remove(target_path)
        return convert_image(author_painting)

In [10]:
num_completed = 0
total_guys = len(paintings)
to_execute = [[author_id, painting_id] for i, (author_id, painting_id)
              in paintings[['author_id', 'painting_id']].iterrows()]
converted = 0
with ThreadPoolExecutor(max_workers = 8) as executor:
    for i in range(0, len(to_execute), 1000):
        end = min(i + 1000, len(to_execute))
        executions = [executor.submit(check_image, author_painting)
                          for author_painting in to_execute[i:end]]
        for future in as_completed(executions):
            result = future.result()
            if result:
                converted += 1
            num_completed+=1
            if num_completed % 100 == 0:
                sys.stdout.write('\r%d/%d' % (num_completed, total_guys))
                sys.stdout.flush()
sys.stdout.write('\r%d/%d, %d converted\n' % (num_completed, total_guys, converted))

207245/207245, 207245 converted
