In [None]:
# SELECT DISTINCT ?image ?date ?car ?carLabel ?brandLabel 
# WHERE {
#   ?car wdt:P31 wd:Q850270;     
#        wdt:P18 ?image;      
#        wdt:P571 ?date;
#        wdt:P176 ?brand;     
#        rdfs:label ?carLabel.
  
#   ?brand rdfs:label ?brandLabel.
  
#   FILTER(LANG(?carLabel) = "fr" && LANG(?brandLabel) = "fr")
# }

Téléchargement du JSON et extraction des images (à ne pas executer car ça prends beaucoup de temps)

In [8]:
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import requests

endpoint_url = "https://query.wikidata.org/sparql"

query = """SELECT DISTINCT ?image ?date ?car ?carLabel ?brandLabel 
WHERE {
  ?car wdt:P31 wd:Q850270;     
       wdt:P18 ?image;      
       wdt:P571 ?date;
       wdt:P176 ?brand;     
       rdfs:label ?carLabel.
  
  ?brand rdfs:label ?brandLabel.
  
  FILTER(LANG(?carLabel) = "fr" && LANG(?brandLabel) = "fr")
}
LIMIT 10
"""

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

results = get_results(endpoint_url, query)

conceptcars = []

for result in results["results"]["bindings"]:
    conceptcars.append({
        "image": result["image"]["value"],
        "date": result["date"]["value"],
        "car": result["car"]["value"],
        "carLabel": result["carLabel"]["value"],
        "brandLabel": result["brandLabel"]["value"]
    })

for conceptcar in conceptcars:
    conceptcar["carLabel"] = conceptcar["carLabel"].replace("\\", "")
    conceptcar["carLabel"] = conceptcar["carLabel"].replace("/", "")
                                              
with open('conceptcars.json', 'w', encoding='utf-8') as f:
    json.dump(conceptcars, f, ensure_ascii=False, indent=4)


In [18]:
for filename in os.listdir("images"):
    os.remove(f"images/{filename}")

with open('conceptcars.json', 'r', encoding='utf-8') as f:
    conceptcars = json.load(f)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

n = 0

for car in conceptcars:
    image_url = car["image"]
    n = n + 1
    r = requests.get(image_url, allow_redirects=True, headers=headers)
    image_url = r.url  

    car_name = car.get("carLabel", "Image n°" + str(n))
    
    image_data = r.content 
    with open(f'images/{car_name}.jpg', 'wb') as f:
        f.write(image_data)

In [1]:
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS
from sklearn.cluster import KMeans
import numpy as np

images_dir = "images"
metadata_dir = "metadata"

os.makedirs(metadata_dir, exist_ok=True)

def get_image_metadata(image_path):
    with Image.open(image_path) as img:
        metadata = {
            "format": img.format,
            "size": img.size,
            "mode": img.mode,
            "orientation": "Landscape" if img.width > img.height else "Portrait" if img.height > img.width else "Square"
        }

        exif_data = img._getexif()
        if exif_data:
            exif_info = {TAGS.get(tag, tag): value for tag, value in exif_data.items()}
            metadata.update({
                "camera_model": exif_info.get("Model", "Unknown"),
                "date_taken": exif_info.get("DateTimeOriginal", "Unknown"),
            })
        
        # Extract predominant colors using KMeans
        img_data = np.array(img.getdata())
        kmeans = KMeans(n_clusters=4, random_state=0).fit(img_data)
        colors = kmeans.cluster_centers_.astype(int).tolist()
        metadata["colors"] = colors

    return metadata

def add_tags(metadata):
    # Example of adding tags manually or using NLP techniques
    tags = ["#example_tag"]
    metadata["tags"] = tags
    return metadata

for filename in os.listdir(images_dir):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        image_path = os.path.join(images_dir, filename)
        metadata = get_image_metadata(image_path)
        metadata = add_tags(metadata)

        json_filename = os.path.splitext(filename)[0] + ".json"
        json_path = os.path.join(metadata_dir, json_filename)

        with open(json_path, "w", encoding="utf-8") as json_file:
            json.dump(metadata, json_file, indent=4, ensure_ascii=False)