In [15]:
import jsonlines
import aiohttp
import asyncio
from PIL import Image
import io
import os
import uuid
from typing import Optional

In [None]:
images_urls = []
with jsonlines.open("data/items_thundrbot_9.jsonl") as reader:
    for site in reader:
        if site["labeled_images"] is not None:
            for image in site["labeled_images"]:
                images_urls.append(image["url"])

In [None]:
text_pairs = []
with jsonlines.open("data/items_thundrbot_9.jsonl") as reader:
    for site in reader:
        if site["labeled_text"] is not None:
            for text in site["labeled_text"]:
                text_pairs.append(text)

In [None]:
with jsonlines.open("data/text_pairs_dataset.jsonl", "w") as writer:
    writer.write_all(text_pairs)

In [None]:
async def download_resize_image(image_url: str, target_directory: str, filename: str) -> None:
    # Download the image using aiohttp
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(image_url) as response:
                response.raise_for_status()
                image_bytes = await response.read()
    except:
        print("Failed:", image_url)
        return
    # Open the image from the downloaded bytes
    with Image.open(io.BytesIO(image_bytes)) as img:
        # Ensure image is in RGBA for proper transparency handling
        img = img.convert("RGBA")
        # Create a new 400x400 background with pink color
        bg_color = (255, 39, 255)
        new_img = Image.new("RGB", (400, 400), bg_color)
        
        img.thumbnail((400, 400), Image.LANCZOS)
        # Calculate coordinates to center the image on the background
        left = (400 - img.width) // 2
        top = (400 - img.height) // 2
        # Paste the resized image onto the off-gray background using the image's alpha channel as mask
        new_img.paste(img, (left, top), img)
    # Ensure target directory exists
    os.makedirs(target_directory, exist_ok=True)
    file_path = os.path.join(target_directory, filename)
    # Save the resized image
    new_img.save(file_path)
    # print(f"Image saved to {file_path}")


In [None]:
# Example usage within an asyncio event loop:
async def main():
    async with asyncio.TaskGroup() as tg:
        for url in images_urls:
            tg.create_task(download_resize_image(url, "./images", str(uuid.uuid4()).split("-")[0] + ".png"))

# Run the example if this script is executed directly
# if __name__ == "__main__":
#     asyncio.run(main())
await main()