In [1]:
import os
from PIL import Image
from tqdm import tqdm
import concurrent.futures

# Resize non-lifestyle images
Resize them to 512 x 512 pixels and pad them with value of 0 (white bg)

In [2]:
def resize_and_pad_non_lifestyle(img_path):
    try:
        img = Image.open(img_path)
        scaling_factor = 512.0 / max(img.size)
        new_size = (int(img.size[0] * scaling_factor), int(img.size[1] * scaling_factor))
        img_resized = img.resize(new_size, Image.LANCZOS)
        
        new_img = Image.new("RGB", (512, 512), "white")
        new_img.paste(img_resized, ((512 - new_size[0]) // 2, (512 - new_size[1]) // 2))
        return new_img
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

# Resize and crop lifestyle images
Resize the shortest side to at least 512 pixels while maintaining the aspect ratio, followed by a center crop to 512x512 pixels.

## Save the images

In [4]:
def process_image(file_info):
    root, file, parent_input_dir, parent_output_dir = file_info
    if file.lower().endswith(('png', 'jpg', 'jpeg')):
        img_path = os.path.join(root, file)
        
        processed_img = resize_and_pad_non_lifestyle(img_path)
        
        rel_dir = os.path.relpath(root, parent_input_dir)
        output_dir = os.path.join(parent_output_dir, rel_dir)
        os.makedirs(output_dir, exist_ok=True)
        
        if processed_img:
            base_name, extension = os.path.splitext(file)
            new_filename = f"{base_name}_resized{extension}"
            save_path = os.path.join(output_dir, new_filename)
            processed_img.save(save_path)
            
    return f"Processed {file}"

def process_and_save_images_parallel(parent_input_dir, parent_output_dir):
    file_info_list = []
    for root, dirs, files in os.walk(parent_input_dir):
        for file in files:
            file_info_list.append((root, file, parent_input_dir, parent_output_dir))
    
    with concurrent.futures.ProcessPoolExecutor() as executor:
        list(tqdm(executor.map(process_image, file_info_list), total=len(file_info_list)))

In [5]:
parent_input_dir = "/kaggle/input/furniture-images-raw"
parent_output_dir = "/kaggle/working/all"
process_and_save_images_parallel(parent_input_dir, parent_output_dir)

100%|██████████| 21298/21298 [15:49<00:00, 22.42it/s]


In [6]:
import subprocess

def zip_folder(folder_path, zip_path):
    """
    Zip the specified folder using the command-line 'zip' utility.

    Args:
    folder_path: The path of the folder to zip.
    zip_path: The path (including filename) of the resulting zip file.
    """
    cmd = ["zip", "-r", zip_path, folder_path]

    subprocess.run(cmd, check=True)


all_folder_path = "/kaggle/working/all"
all_zip_path = "/kaggle/working/all-new.zip"
zip_folder(all_folder_path, all_zip_path)

  adding: kaggle/working/all/ (stored 0%)
  adding: kaggle/working/all/sofas/ (stored 0%)
  adding: kaggle/working/all/sofas/2-seaters/ (stored 0%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/ (stored 0%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/91121_image_4_resized.jpg (deflated 3%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/79718_image_0_resized.jpg (deflated 11%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/53184_image_2_resized.jpg (deflated 24%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/42418_image_2_resized.jpg (deflated 9%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/91431_image_7_resized.jpg (deflated 21%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/64975_image_8_resized.jpg (deflated 2%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/85215_image_28_resized.jpg (deflated 15%)
  adding: kaggle/working/all/sofas/2-seaters/2-seaters/54312_image_5_resized.jpg (deflated 19%)
  adding: kaggle/w