## Functions to request google images of waste

Limit of 100 images per day on free google API. 

API and Programmable search engines were created

In [2]:
import requests
import os
from PIL import Image
from io import BytesIO
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")
CSE_ID = os.getenv("GOOGLE_CSE_ID")

NUM_IMAGES = 25
IMAGE_SIZE = (224, 224)
SAVE_DIR = "./waste_dataset"

categories = {
    "compostable": ["leaves", "vegetablescraps", "cornhusks", "potatopeels"]
}

def fetch_images(search_query, category, num_images=NUM_IMAGES):
    url = "https://www.googleapis.com/customsearch/v1"
    images_fetched = 0
    start_index = 1

    while images_fetched < num_images:
        batch_size = min(num_images - images_fetched, 10)
        params = {
            "q": search_query,
            "searchType": "image",
            "key": API_KEY,
            "cx": CSE_ID,
            "num": batch_size,
            "start": start_index
        }

        response = requests.get(url, params=params)
        if response.status_code != 200:
            print(f"Error fetching images: {response.json().get('error', {}).get('message', 'Unknown error')}")
            return

        results = response.json().get("items", [])
        if not results:
            print(f"No more results for {search_query}.")
            break

        if not os.path.exists(f"{SAVE_DIR}/{category}"):
            os.makedirs(f"{SAVE_DIR}/{category}")

        for idx, item in enumerate(results):
            try:
                img_url = item["link"]
                img_data = requests.get(img_url).content
                img = Image.open(BytesIO(img_data)).convert("RGB")
                img = img.resize(IMAGE_SIZE)

                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                img.save(f"{SAVE_DIR}/{category}/{search_query}_{timestamp}_{images_fetched + idx + 1}.jpg")
                print(f"Saved: {search_query}_{timestamp}_{images_fetched + idx + 1}.jpg")
            except Exception as e:
                print(f"Error with {search_query} image: {e}")

        images_fetched += len(results)
        start_index += len(results)

for category, queries in categories.items():
    for query in queries:
        fetch_images(query, category)

print("Image collection complete")


Saved: leaves_20250218_082248_1.jpg
Saved: leaves_20250218_082248_2.jpg
Saved: leaves_20250218_082248_3.jpg
Saved: leaves_20250218_082248_4.jpg
Saved: leaves_20250218_082249_5.jpg
Saved: leaves_20250218_082249_6.jpg
Saved: leaves_20250218_082250_7.jpg
Saved: leaves_20250218_082250_8.jpg
Saved: leaves_20250218_082250_9.jpg
Saved: leaves_20250218_082250_10.jpg
Saved: leaves_20250218_082250_11.jpg
Saved: leaves_20250218_082250_12.jpg
Error with leaves image: cannot identify image file <_io.BytesIO object at 0x1083aebb0>
Saved: leaves_20250218_082251_14.jpg
Saved: leaves_20250218_082251_15.jpg
Saved: vegetablescraps_20250218_082251_1.jpg
Saved: vegetablescraps_20250218_082252_2.jpg
Saved: vegetablescraps_20250218_082252_3.jpg
Error with vegetablescraps image: cannot identify image file <_io.BytesIO object at 0x11e0aeca0>
Saved: vegetablescraps_20250218_082253_5.jpg
Saved: vegetablescraps_20250218_082253_6.jpg
Saved: vegetablescraps_20250218_082253_7.jpg
Error with vegetablescraps image: ca

**Compostable:**

Orange peel
Apple core
Banana peel
Coffee grounds
Tea bag
Eggshell
Paper towel
Grass clippings
Leaves
Bread crust


**Non-Recyclable:**

Styrofoam plate
Plastic wrap
Pizza box with grease
Snack wrapper
Juice box
Toothpaste tube
Plastic bag
Broken glass
Disposable mask
Ceramic dish