In [3]:
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image

categories = [
    'Shoe', 'Sneaker', 'Bottle', 'Cup', 'Sandal', 'Perfume', 'Toy', 'Sunglasses', 'Car', 'Water Bottle',
    'Chair', 'Office Chair', 'Can', 'Cap', 'Hat', 'Couch', 'Wristwatch', 'Glass', 'Bag', 'Handbag', 'Baggage',
    'Suitcase', 'Headphones', 'Jar', 'Vase'
]

base_url = "https://in.images.search.yahoo.com/search/images;_ylt=AwrKAmbqHFRmcVERdEy7HAx.;_ylu=Y29sbwNzZzMEcG9zAzEEdnRpZAMEc2VjA3BpdnM-?p=={query}"  # Placeholder, replace with a legal image source

# Function to fetch image URLs from a search query
def fetch_image_urls(query, max_urls=200):
    url = base_url.format(query=query)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract image URLs based on your image source's HTML structure
    image_tags = soup.find_all('img', attrs={'src': True})  # Adjust selector as needed

    image_urls = []
    for image_tag in image_tags[:max_urls]:
        image_url = image_tag['src']
        image_urls.append(image_url)

    return image_urls

# Function to download and process images
def download_and_process_image(image_url, category_dir, filename):
    response = requests.get(image_url, stream=True)
    if response.status_code == 200:
        try:
            with open(os.path.join(category_dir, filename), 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)

            # Resize and convert to RGB
            image_path = os.path.join(category_dir, filename)
            with Image.open(image_path) as img:
                img = img.resize((256, 256))
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                img.save(image_path)
        except Exception as e:
            print(f"Error processing image {image_url}: {e}")

# Main function
def main():
    # Define the directory to save images
    save_dir = 'Custom Datatset'
    os.makedirs(save_dir, exist_ok=True)

    for category in categories:
        print(f"Fetching images for {category}...")

        # Create category directory
        category_dir = os.path.join(save_dir, category)
        os.makedirs(category_dir, exist_ok=True)

        # Fetch image URLs (respect search engine terms of service)
        image_urls = fetch_image_urls(category)

        # Download and process up to 200 images per category
        for i, image_url in enumerate(image_urls[:200]):
            filename = f"{category}_{i}.jpg"  # Use a common format like JPG for consistency
            download_and_process_image(image_url, category_dir, filename)

        print(f"{len(image_urls[:200])} images downloaded and saved for {category}.")

if __name__ == "__main__":
    main()


Fetching images for Shoe...
59 images downloaded and saved for Shoe.
Fetching images for Sneaker...
60 images downloaded and saved for Sneaker.
Fetching images for Bottle...
57 images downloaded and saved for Bottle.
Fetching images for Cup...
60 images downloaded and saved for Cup.
Fetching images for Sandal...
60 images downloaded and saved for Sandal.
Fetching images for Perfume...
60 images downloaded and saved for Perfume.
Fetching images for Toy...
57 images downloaded and saved for Toy.
Fetching images for Sunglasses...
60 images downloaded and saved for Sunglasses.
Fetching images for Car...
60 images downloaded and saved for Car.
Fetching images for Water Bottle...
60 images downloaded and saved for Water Bottle.
Fetching images for Chair...
60 images downloaded and saved for Chair.
Fetching images for Office Chair...
60 images downloaded and saved for Office Chair.
Fetching images for Can...
60 images downloaded and saved for Can.
Fetching images for Cap...
59 images download

In [None]:
categories = {
    "apparel": ["Shoe", "Sneaker", "Sandal", "Hat", "Cap" ],
    "electronics": ["Sunglasses", "Headphones"],
    "accessories": ["Perfume", "Wristwatch", "Glass", "Bag", "Handbag", "Suitcase"],
    "containers": ["Bottle", "Cup", "Water Bottle", "Can", "Jar", "Vase"],
    "furniture": ["Chair", "Office Chair","Couch"],
}
