Task 1:



In [28]:
import os
import numpy as np
from PIL import Image
import time

In [32]:
def process_file(file_path, processed_file_path, target_size = 256, aspect_ratio_thres = 1.333):
    with Image.open(file_path) as img:
        # convert the image to RGB to ensure consistency
        image = img.convert('RGB')

        # crop out the longer dimension so it's square
        width, height = image.size
        new_size = min(width, height)
        if new_size * aspect_ratio_thres < max(width, height):
            # print(f'skipped due to aspect ratio too high')
            return 1
        left = (width - new_size) / 2
        top = (height - new_size) / 2
        right = (width + new_size) / 2
        bottom = (height + new_size) / 2
        image = image.crop((left, top, right, bottom))
        
        # resize
        image = image.resize((target_size, target_size))
        
        # convert to numpy array
        img_array = np.array(image)

        # print(f'processed array shape: {img_array.shape}')
        
        # save to file
        np.save(processed_file_path, img_array)
        return 0

In [33]:
raw_data_dir = '../../Data/Art'
processed_data_dir = '../../Data/Art_Processed'

# Create the processed_data directory if it doesn't exist
if not os.path.exists(processed_data_dir):
    os.makedirs(processed_data_dir)

# List all subfolders in raw_data
for subdir in os.listdir(raw_data_dir):
    print(f'reading directory {subdir}')
    raw_subdir_path = os.path.join(raw_data_dir, subdir)
    
    # Check if it's a directory
    if not os.path.isdir(raw_subdir_path):
        continue
    processed_subdir_path = os.path.join(processed_data_dir, subdir)
    
    # Create the subfolder in processed_data if it doesn't exist
    if not os.path.exists(processed_subdir_path):
        os.makedirs(processed_subdir_path)
    
    count = 0
    processed_count = 0
    total = len(os.listdir(raw_subdir_path))
    # Process each file in the subdirectory
    for filename in os.listdir(raw_subdir_path):
        file_path = os.path.join(raw_subdir_path, filename)
        processed_file_path = os.path.join(processed_subdir_path, filename)
        
        # Process the file
        return_code = process_file(file_path, processed_file_path)
        count += 1
        if return_code == 0:
            processed_count += 1
        if count % 100 == 0:
            print(f'traversed {count} / {total}, processed {processed_count}')


reading directory Art_Nouveau
traversed 100 / 3035, processed 23
traversed 200 / 3035, processed 47
traversed 300 / 3035, processed 90
traversed 400 / 3035, processed 121
traversed 500 / 3035, processed 167
traversed 600 / 3035, processed 195
traversed 700 / 3035, processed 208
traversed 800 / 3035, processed 219
traversed 900 / 3035, processed 225
traversed 1000 / 3035, processed 241
traversed 1100 / 3035, processed 249


KeyboardInterrupt: 