In [5]:
import os
import requests
import json
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
from PIL import Image
from PIL.ExifTags import TAGS
from io import BytesIO

In [None]:
def create_images_folder():
    if not os.path.exists("images"):
        os.makedirs("images")
        print("Dossier 'images' créé.")

def get_wikidata_images(query):
    endpoint_url = "https://query.wikidata.org/sparql"
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

def download_images_from_wikidata(num_images=100):
    query = """
    SELECT ?item ?itemLabel ?pic WHERE {
  ?item wdt:P31 wd:Q3305213;
    wdt:P18 ?pic.
}
LIMIT 110
    """
    
    results = get_wikidata_images(query)
    image_urls = [result["pic"]["value"] for result in results["results"]["bindings"]]
    downloaded_images = []
    
    # Définir les headers avec un User-Agent de type Mozilla
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    for idx, url in enumerate(image_urls[:num_images]):
        try:
            img_response = requests.get(url, headers=headers)  # Ajout des headers ici
            img = Image.open(BytesIO(img_response.content))
            
            extension = img.format.lower()
            filename = f"images/image_{idx}.{extension}"
            img.save(filename)
            downloaded_images.append(filename)
            print(f"Image enregistrée : {filename}")
        except Exception as e:
            print(f"Erreur lors du téléchargement de {url}: {e}")
    
    return downloaded_images

def extract_metadata(image_path):
    try:
        img = Image.open(image_path)
        metadata = {
            "filename": os.path.basename(image_path),
            "format": img.format,
            "size": img.size,
            "orientation": "Portrait" if img.size[1] > img.size[0] else "Paysage" if img.size[0] > img.size[1] else "Carré",
        }
        
        exif_data = img._getexif()
        if exif_data:
            for tag, value in exif_data.items():
                decoded = TAGS.get(tag, tag)
                metadata[decoded] = value
        
        return metadata
    except Exception as e:
        print(f"Erreur lors de l'extraction des métadonnées pour {image_path}: {e}")
        return {}

def save_metadata(image_files, output_json="metadata.json"):
    metadata_list = [extract_metadata(img) for img in image_files]
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(metadata_list, f, indent=4, ensure_ascii=False)
    print(f"Métadonnées enregistrées dans {output_json}")

def main():
    create_images_folder()
    images = download_images_from_wikidata()
    if images:
        save_metadata(images)
    else:
        print("Aucune image téléchargée.")

if __name__ == "__main__":
    main()


Dossier 'images' créé.
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/Las%20Meninas%2C%20by%20Diego%20Vel%C3%A1zquez%2C%20from%20Prado%20in%20Google%20Earth.jpg: Image size (781950000 pixels) exceeds limit of 178956970 pixels, could be decompression bomb DOS attack.
Image enregistrée : images/image_1.jpeg
Image enregistrée : images/image_2.mpo
Image enregistrée : images/image_3.jpeg
Image enregistrée : images/image_4.jpeg
Image enregistrée : images/image_5.jpeg
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/The%20Night%20Watch%20-%20HD.jpg: Image size (2800346094 pixels) exceeds limit of 178956970 pixels, could be decompression bomb DOS attack.
Image enregistrée : images/image_7.jpeg
Image enregistrée : images/image_8.jpeg
Image enregistrée : images/image_9.jpeg
Image enregistrée : images/image_10.jpeg
Image enregistrée : images/image_11.jpeg
Image enregistrée : images/image_12.jpeg
Image enregistrée : images/ima



Image enregistrée : images/image_38.jpeg
Image enregistrée : images/image_39.jpeg


KeyboardInterrupt: 