Task 1:



In [2]:
import os
import numpy as np
from PIL import Image
import time

In [8]:
def process_file(file_path, processed_file_path, target_size = 256, aspect_ratio_thres = 1.3):
    with Image.open(file_path) as image:
        # crop out the longer dimension so it's square
        width, height = image.size
        new_size = min(width, height)
        if new_size * aspect_ratio_thres < max(width, height):
            # print(f'skipped due to aspect ratio too high')
            return 1
        left = (width - new_size) / 2
        top = (height - new_size) / 2
        right = (width + new_size) / 2
        bottom = (height + new_size) / 2
        image = image.crop((left, top, right, bottom))
        
        # resize
        image = image.resize((target_size, target_size))

        # convert the image to RGB
        image = image.convert('RGB')
        
        # convert to numpy array
        img_array = np.array(image)

        # print(f'processed array shape: {img_array.shape}')
        if img_array.shape != (target_size, target_size, 3):
            print(f'error! wrong shape {img_array.shape}')
        
        # save to file
        np.save(processed_file_path, img_array)
        return 0

In [9]:
raw_data_dir = '../../Data/Art'
processed_data_dir = '../../Data/Art_Processed'

# Create the processed_data directory if it doesn't exist
if not os.path.exists(processed_data_dir):
    os.makedirs(processed_data_dir)

# List all subfolders in raw_data
for subdir in os.listdir(raw_data_dir):
    print(f'reading directory {subdir}')
    raw_subdir_path = os.path.join(raw_data_dir, subdir)
    
    # Check if it's a directory
    if not os.path.isdir(raw_subdir_path):
        continue
    processed_subdir_path = os.path.join(processed_data_dir, subdir)
    
    # Create the subfolder in processed_data if it doesn't exist
    if not os.path.exists(processed_subdir_path):
        os.makedirs(processed_subdir_path)
    
    count = 0
    processed_count = 0
    total = len(os.listdir(raw_subdir_path))
    # Process each file in the subdirectory
    for filename in os.listdir(raw_subdir_path):
        file_path = os.path.join(raw_subdir_path, filename)
        processed_file_path = os.path.join(processed_subdir_path, filename)
        
        # Process the file
        return_code = process_file(file_path, processed_file_path)
        count += 1
        if return_code == 0:
            processed_count += 1
        if count % 100 == 0:
            print(f'traversed {count} / {total}, processed {processed_count}')


reading directory Art_Nouveau
traversed 100 / 3035, processed 20
traversed 200 / 3035, processed 34
traversed 300 / 3035, processed 75
traversed 400 / 3035, processed 100
traversed 500 / 3035, processed 140
traversed 600 / 3035, processed 164
traversed 700 / 3035, processed 173
traversed 800 / 3035, processed 182
traversed 900 / 3035, processed 187
traversed 1000 / 3035, processed 198
traversed 1100 / 3035, processed 206
traversed 1200 / 3035, processed 251
traversed 1300 / 3035, processed 290
traversed 1400 / 3035, processed 306
traversed 1500 / 3035, processed 314
traversed 1600 / 3035, processed 322
traversed 1700 / 3035, processed 328
traversed 1800 / 3035, processed 335
traversed 1900 / 3035, processed 354
traversed 2000 / 3035, processed 394
traversed 2100 / 3035, processed 434
traversed 2200 / 3035, processed 447
traversed 2300 / 3035, processed 476
traversed 2400 / 3035, processed 510
traversed 2500 / 3035, processed 557
traversed 2600 / 3035, processed 585
traversed 2700 / 303



traversed 200 / 1158, processed 43
traversed 300 / 1158, processed 59
traversed 400 / 1158, processed 83
traversed 500 / 1158, processed 114
traversed 600 / 1158, processed 123
traversed 700 / 1158, processed 147
traversed 800 / 1158, processed 168
traversed 900 / 1158, processed 206
traversed 1000 / 1158, processed 234
traversed 1100 / 1158, processed 295
