In [8]:
from utils import download_images

In [9]:
import pandas as pd
from tqdm import tqdm
import multiprocessing
from functools import partial
from utils import download_image  # Import the function

def download_images(image_links, download_folder, allow_multiprocessing=True):
    if allow_multiprocessing:
        download_image_partial = partial(
            download_image, save_folder=download_folder, retries=3, delay=3)

        with multiprocessing.Pool(64) as pool:
            list(tqdm(pool.imap(download_image_partial, image_links), total=len(image_links)))
            pool.close()
            pool.join()
    else:
        for image_link in tqdm(image_links, total=len(image_links)):
            download_image(image_link, save_folder=download_folder, retries=3, delay=3)


In [10]:

if __name__ == "__main__":
    # Example usage
    data = pd.read_csv('/Users/aryanahmadchaudhary/Amazon ML Hackathon/Download_Images/sample_test.csv')
    image_links = data['image_link'].tolist()
    download_images(image_links, '/Users/aryanahmadchaudhary/Amazon ML Hackathon/Download_Images/new/Images')


100%|██████████| 88/88 [00:08<00:00, 10.99it/s]


In [11]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

# Define the target size for resizing (you can adjust this based on your model's requirement)
TARGET_SIZE = (224, 224)
SAVE_PROCESSED_IMAGES = True  # Set to True to save processed images

def preprocess_image(image_path, target_size=TARGET_SIZE):
    """
    Preprocess a single image by resizing it and normalizing its pixel values.

    Args:
        image_path (str): Path to the image file.
        target_size (tuple): Desired image size (width, height).

    Returns:
        np.array: Preprocessed image as a NumPy array.
    """
    try:
        # Open the image
        img = Image.open(image_path)

        # Resize the image
        img = img.resize(target_size)

        # Convert image to array
        img_array = np.array(img)

        # Normalize pixel values (0-255) to range [0, 1]
        img_array = img_array / 255.0

        return img_array

    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        return None

def preprocess_images(image_folder, target_size=TARGET_SIZE):
    """
    Preprocess all images in a given folder.

    Args:
        image_folder (str): Path to the folder containing the images.
        target_size (tuple): Desired image size (width, height).

    Returns:
        list: List of preprocessed images as NumPy arrays.
    """
    preprocessed_images = []

    # Get a list of image file names
    image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('png', 'jpg', 'jpeg'))]
    total_images = len(image_files)
    
    print(f"Found {total_images} images to preprocess.")

    for image_file in tqdm(image_files, desc="Preprocessing images"):
        preprocessed_image = preprocess_image(image_file, target_size=target_size)
        
        if preprocessed_image is not None:
            preprocessed_images.append(preprocessed_image)

            # Optionally save the preprocessed image
            if SAVE_PROCESSED_IMAGES:
                save_processed_image(preprocessed_image, image_file, target_size)

    return np.array(preprocessed_images)

def save_processed_image(image_array, original_image_path, target_size):
    """
    Save the preprocessed image as a .jpg file in a new folder.

    Args:
        image_array (np.array): The preprocessed image array.
        original_image_path (str): Original image path to derive the name.
        target_size (tuple): Target size for image resizing.
    """
    try:
        processed_image_folder = "preprocessed_images"
        if not os.path.exists(processed_image_folder):
            os.makedirs(processed_image_folder)

        # Convert array back to an image
        img = Image.fromarray((image_array * 255).astype('uint8'))

        # Generate save path
        filename = os.path.basename(original_image_path)
        save_path = os.path.join(processed_image_folder, f"processed_{filename}")

        # Save image
        img.save(save_path)
        print(f"Saved preprocessed image to {save_path}")
    except Exception as e:
        print(f"Error saving image {original_image_path}: {e}")


In [12]:

# Example usage
image_folder = "/Users/aryanahmadchaudhary/Amazon ML Hackathon/Download_Images/new/Images"  # Replace with the path to your images folder
preprocessed_images = preprocess_images(image_folder, target_size=TARGET_SIZE)

# Shape of preprocessed_images should be (num_images, 224, 224, 3)
print(f"Preprocessed {len(preprocessed_images)} images.")


Found 54 images to preprocess.


Preprocessing images:   9%|▉         | 5/54 [00:00<00:01, 46.41it/s]

Saved preprocessed image to preprocessed_images/processed_71ta6wY3HtL.jpg
Saved preprocessed image to preprocessed_images/processed_51+oHGvSvuL.jpg
Saved preprocessed image to preprocessed_images/processed_41zgjN+zW3L.jpg
Saved preprocessed image to preprocessed_images/processed_71fWddA0+yL.jpg
Saved preprocessed image to preprocessed_images/processed_51cPZYLk2YL.jpg
Saved preprocessed image to preprocessed_images/processed_41-NCxNuBxL.jpg
Saved preprocessed image to preprocessed_images/processed_71eCfiIG-AL.jpg
Saved preprocessed image to preprocessed_images/processed_51KykmLgc0L.jpg
Saved preprocessed image to preprocessed_images/processed_51r7U52rh7L.jpg
Saved preprocessed image to preprocessed_images/processed_615Cjzm6pyL.jpg
Saved preprocessed image to preprocessed_images/processed_514pScQdlCL.jpg
Saved preprocessed image to preprocessed_images/processed_51jTe522S2L.jpg
Saved preprocessed image to preprocessed_images/processed_41ygXRvf8lL.jpg


Preprocessing images:  26%|██▌       | 14/54 [00:00<00:00, 66.94it/s]

Saved preprocessed image to preprocessed_images/processed_71UYDq4nfnL.jpg
Saved preprocessed image to preprocessed_images/processed_51oaOP8qJlL.jpg


Preprocessing images:  39%|███▉      | 21/54 [00:00<00:00, 63.44it/s]

Saved preprocessed image to preprocessed_images/processed_61E2XRNSdYL.jpg
Saved preprocessed image to preprocessed_images/processed_71WAjPMQDWL.jpg
Saved preprocessed image to preprocessed_images/processed_71UN1IxKp4L.jpg
Saved preprocessed image to preprocessed_images/processed_514bY8c4ZIL.jpg
Saved preprocessed image to preprocessed_images/processed_51EBBqNOJ1L.jpg
Saved preprocessed image to preprocessed_images/processed_51y79cwGJFL.jpg
Saved preprocessed image to preprocessed_images/processed_41uwo4PVnuL.jpg
Saved preprocessed image to preprocessed_images/processed_41nblnEkJ3L.jpg
Saved preprocessed image to preprocessed_images/processed_51P0IuT6RsL.jpg
Saved preprocessed image to preprocessed_images/processed_41o3iis9E7L.jpg
Saved preprocessed image to preprocessed_images/processed_41pvwR9GbaL.jpg
Saved preprocessed image to preprocessed_images/processed_51l6c6UcRZL.jpg
Saved preprocessed image to preprocessed_images/processed_51kdBAv6ImL.jpg
Saved preprocessed image to preprocess

Preprocessing images:  81%|████████▏ | 44/54 [00:00<00:00, 72.43it/s]

Saved preprocessed image to preprocessed_images/processed_81PG3ea0MOL.jpg
Saved preprocessed image to preprocessed_images/processed_51FSlaVlejL.jpg
Saved preprocessed image to preprocessed_images/processed_61lX6IP1SVL.jpg
Saved preprocessed image to preprocessed_images/processed_51Su6zXkAsL.jpg
Saved preprocessed image to preprocessed_images/processed_71afEPoRGsL.jpg
Saved preprocessed image to preprocessed_images/processed_81IYdOV0mVL.jpg
Saved preprocessed image to preprocessed_images/processed_613P5cxQH4L.jpg
Saved preprocessed image to preprocessed_images/processed_71v+pim0lfL.jpg
Saved preprocessed image to preprocessed_images/processed_51BEuVR4ZzL.jpg
Saved preprocessed image to preprocessed_images/processed_51bEy0J5wLL.jpg
Saved preprocessed image to preprocessed_images/processed_51tEop-EBJL.jpg
Saved preprocessed image to preprocessed_images/processed_61C+fwVD6dL.jpg
Saved preprocessed image to preprocessed_images/processed_51-WIOx5pxL.jpg


Preprocessing images: 100%|██████████| 54/54 [00:00<00:00, 67.42it/s]

Saved preprocessed image to preprocessed_images/processed_71Qk6hR9-WL.jpg
Saved preprocessed image to preprocessed_images/processed_417SThj+SrL.jpg
Saved preprocessed image to preprocessed_images/processed_81qUmRUUTTL.jpg
Saved preprocessed image to preprocessed_images/processed_61G8bvWOb-L.jpg
Saved preprocessed image to preprocessed_images/processed_81aZ2ozp1GL.jpg
Saved preprocessed image to preprocessed_images/processed_614hn5uX9MS.jpg
Saved preprocessed image to preprocessed_images/processed_61O+Yi09tyL.jpg
Preprocessed 54 images.



