In [9]:
import os
import requests
import json
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
from PIL import Image
from PIL.ExifTags import TAGS
from io import BytesIO

In [12]:
def create_images_folder():
    if not os.path.exists("images"):
        os.makedirs("images")
        print("Dossier 'images' créé.")


def get_wikidata_images(query):
    endpoint_url = "https://query.wikidata.org/sparql"
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def download_images_from_wikidata(num_images=100):
    query = """
    SELECT ?item ?itemLabel ?pic WHERE {
  ?item wdt:P31 wd:Q3305213;
    wdt:P18 ?pic.
}
LIMIT 100
    """
    
    results = get_wikidata_images(query)
    image_urls = [result["pic"]["value"] for result in results["results"]["bindings"]]
    downloaded_images = []
    
    for idx, url in enumerate(image_urls[:num_images]):
        try:
            img_response = requests.get(url)
            img = Image.open(BytesIO(img_response.content))
            
            extension = img.format.lower()
            filename = f"images/image_{idx}.{extension}"
            img.save(filename)
            downloaded_images.append(filename)
            print(f"Image enregistrée : {filename}")
        except Exception as e:
            print(f"Erreur lors du téléchargement de {url}: {e}")
    
    return downloaded_images


def extract_metadata(image_path):
    try:
        img = Image.open(image_path)
        metadata = {
            "filename": os.path.basename(image_path),
            "format": img.format,
            "size": img.size,
            "orientation": "Portrait" if img.size[1] > img.size[0] else "Paysage" if img.size[0] > img.size[1] else "Carré",
        }
        
        exif_data = img._getexif()
        if exif_data:
            for tag, value in exif_data.items():
                decoded = TAGS.get(tag, tag)
                metadata[decoded] = value
        
        return metadata
    except Exception as e:
        print(f"Erreur lors de l'extraction des métadonnées pour {image_path}: {e}")
        return {}


def save_metadata(image_files, output_json="metadata.json"):
    metadata_list = [extract_metadata(img) for img in image_files]
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(metadata_list, f, indent=4, ensure_ascii=False)
    print(f"Métadonnées enregistrées dans {output_json}")


def main():
    create_images_folder()
    images = download_images_from_wikidata()
    if images:
        save_metadata(images)
    else:
        print("Aucune image téléchargée.")


if __name__ == "__main__":
    main()


Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/Anthonis%20van%20Dyck%20004.jpg: cannot identify image file <_io.BytesIO object at 0x0000028EBEBA2390>
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/Anthony%20van%20Dyck%20-%20Crowning%20with%20Thorns%20-%20WGA07433.jpg: cannot identify image file <_io.BytesIO object at 0x0000028EBDA28B30>
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/The%20Wounded%20Angel%20-%20Hugo%20Simberg.jpg: cannot identify image file <_io.BytesIO object at 0x0000028EBDA28B30>
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/Pierre-Auguste%20Renoir%20-%20Femme%20assise%20au%20bord%20de%20la%20mer.jpg: cannot identify image file <_io.BytesIO object at 0x0000028EBEBDD6C0>
Image enregistrée : images/image_4.png
Erreur lors du téléchargement de http://commons.wikimedia.org/wiki/Special:FilePath/Caravaggio%20-%20Cupid%20as%20