In [13]:
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os
import re

def sanitize_filename(filename):
    """Remplace les caractères non valides pour les noms de fichiers."""
    return re.sub(r'[<>:"/\\|?*]', '', filename)

def download_images(keyword, num_images=500):
    # Crée un nom de dossier valide pour le mot-clé
    sanitized_keyword = sanitize_filename(keyword)
    folder_path = os.path.join(os.getcwd(), sanitized_keyword)
    
    # Crée le dossier s'il n'existe pas
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    search_url = f"https://www.bing.com/images/search?q={keyword}&form=HDRSC2&first=1&tsc=ImageBasicHover"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.text, 'lxml')

    # Trouve toutes les balises d'images
    img_tags = soup.find_all('img')

    image_count = 0
    for img_tag in img_tags:
        if image_count >= num_images:
            break

        img_url = img_tag.get('src')
        
        # Ignore les URL non valides ou les données
        if not img_url or img_url.startswith('data:'):
            continue
        
        # Gère les URL relatives
        if not img_url.startswith('http'):
            img_url = 'https://www.bing.com' + img_url
        
        try:
            img_response = requests.get(img_url)
            img = Image.open(BytesIO(img_response.content))
            
            if img.width >= 128 and img.height >= 128:
                img_path = os.path.join(folder_path, f'image_{image_count+1}.jpg')
                img.save(img_path)
                print(f'Downloaded {img_path}')
                image_count += 1
            else:
                print(f'Skipping image_{image_count+1}.jpg as it does not meet the size requirement')
        except Exception as e:
            print(f'Failed to download or process image from {img_url}: {e}')

if __name__ == '__main__':
    keyword = input("Enter keyword for image search: ")
    download_images(keyword, 500)


Enter keyword for image search:  Gengar


Failed to download or process image from https://r.bing.com/rp/f21jlSMmEDN43OaavcdaB-7Phq0.svg: cannot identify image file <_io.BytesIO object at 0x7e4c41dd5b70>
Failed to download or process image from https://r.bing.com/rp/fdVZU4ttbw8NDRm6H3I5BW3_vCo.svg: cannot identify image file <_io.BytesIO object at 0x7e4c236aec00>
Failed to download or process image from https://r.bing.com/rp/4L4QdyjTv0HYE2Ig2ol9eYoqxg8.svg: cannot identify image file <_io.BytesIO object at 0x7e4c23a35d50>
Failed to download or process image from https://r.bing.com/rp/Fsa_OI0AplCnVoXGca8ALOo0S0s.svg: cannot identify image file <_io.BytesIO object at 0x7e4c23a57a10>
Failed to download or process image from https://r.bing.com/rp/UYtUYDcn1oZlFG-YfBPz59zejYI.svg: cannot identify image file <_io.BytesIO object at 0x7e4c41dd5b70>
Failed to download or process image from https://r.bing.com/rp/KC_nX2_tPPyFvVw1RK20Yu1FyDk.svg: cannot identify image file <_io.BytesIO object at 0x7e4c23a57a10>
Failed to download or proces