In [None]:
# SELECT DISTINCT ?image ?date ?car ?carLabel ?brandLabel 
# WHERE {
#   ?car wdt:P31 wd:Q850270;     
#        wdt:P18 ?image;      
#        wdt:P571 ?date;
#        wdt:P176 ?brand;     
#        rdfs:label ?carLabel.
  
#   ?brand rdfs:label ?brandLabel.
  
#   FILTER(LANG(?carLabel) = "fr" && LANG(?brandLabel) = "fr")
# }

Téléchargement du JSON et extraction des images (à ne pas executer car ça prends beaucoup de temps)

In [26]:
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import requests

endpoint_url = "https://query.wikidata.org/sparql"

query = """SELECT DISTINCT ?image ?date ?car ?carLabel ?brandLabel 
WHERE {
  ?car wdt:P31 wd:Q850270;     
       wdt:P18 ?image;      
       wdt:P571 ?date;
       wdt:P176 ?brand;     
       rdfs:label ?carLabel.
  
  ?brand rdfs:label ?brandLabel.
  
  FILTER(LANG(?carLabel) = "fr" && LANG(?brandLabel) = "fr")
}
LIMIT 150
"""

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

results = get_results(endpoint_url, query)

conceptcars = []

for result in results["results"]["bindings"]:
    conceptcars.append({
        "image": result["image"]["value"],
        "date": result["date"]["value"],
        "car": result["car"]["value"],
        "carLabel": result["carLabel"]["value"],
        "brandLabel": result["brandLabel"]["value"]
    })

for conceptcar in conceptcars:
    conceptcar["carLabel"] = conceptcar["carLabel"].replace("\\", "")
    conceptcar["carLabel"] = conceptcar["carLabel"].replace("/", "")
                                              
with open('conceptcars.json', 'w', encoding='utf-8') as f:
    json.dump(conceptcars, f, ensure_ascii=False, indent=4)


In [27]:
for filename in os.listdir("images"):
    os.remove(f"images/{filename}")
    
with open('conceptcars.json', 'r', encoding='utf-8') as f:
    conceptcars = json.load(f)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

n = 0

for car in conceptcars:
    image_url = car["image"]
    n = n + 1
    r = requests.get(image_url, allow_redirects=True, headers=headers)
    image_url = r.url  

    car_name = car.get("carLabel", "Image n°" + str(n))
    
    image_data = r.content 
    with open(f'images/{car_name}.jpg', 'wb') as f:
        f.write(image_data)

In [None]:
import os
import json
from PIL import Image as PILImage
from PIL.ExifTags import TAGS
import numpy as np
from sklearn.cluster import KMeans
import ipywidgets as widgets
from ipywidgets import GridspecLayout
from IPython.display import display

title = widgets.HTML("<h2 style='text-align: center;'>Tag selection</h2>")
images_dir = "images"
metadata_dir = "metadata"
os.makedirs(metadata_dir, exist_ok=True)

images = [os.path.join(images_dir, f) for f in os.listdir(images_dir) 
          if f.lower().endswith((".jpg", ".jpeg", ".png"))]

if not images:
    print("Aucune image trouvée dans le dossier.")
else:
    layout = GridspecLayout(len(images), 4, width="800px", grid_gap="5px")
    checkboxes = []

    for i, img_path in enumerate(images):
        with open(img_path, "rb") as img_file:
            img_data = img_file.read()
        img_widget = widgets.Image(value=img_data, format="jpg", width=150, height=150)

        with PILImage.open(img_path) as img:
            metadata = {
                "format": img.format,
                "size": img.size,
                "mode": img.mode,
                "orientation": "Landscape" if img.width > img.height else "Portrait" if img.height > img.width else "Square",
                "camera_model": None,  
                "date_taken": None,  
                "colors": None,  
            }

            exif_data = img._getexif()
            if exif_data:
                exif_info = {TAGS.get(tag, tag): value for tag, value in exif_data.items()}
                metadata["camera_model"] = exif_info.get("Model", None)  
                metadata["date_taken"] = exif_info.get("DateTimeOriginal", None) 

            try:
                img_data = np.array(img.convert("RGB").getdata()) 
                kmeans = KMeans(n_clusters=4, random_state=0).fit(img_data)  
                colors = kmeans.cluster_centers_.astype(int).tolist() 
                metadata["colors"] = colors  
            except Exception as e:
                print(f"Erreur lors de l'extraction des couleurs pour {os.path.basename(img_path)}: {e}")
                metadata["colors"] = None 

        vintage_box = widgets.Checkbox(value=False, description="")
        original_box = widgets.Checkbox(value=False, description="")
        night_box = widgets.Checkbox(value=False, description="")
        label_style = {'font_size': '8px', 'min_width': '70px'}
        vintage = widgets.HBox([vintage_box, widgets.Label("Vintage", layout=widgets.Layout(**label_style))], layout=widgets.Layout(justify_content="center", padding="0 2px"))
        original = widgets.HBox([original_box, widgets.Label("Original", layout=widgets.Layout(**label_style))], layout=widgets.Layout(justify_content="center", padding="0 2px"))
        night = widgets.HBox([night_box, widgets.Label("Night Photo", layout=widgets.Layout(**label_style))], layout=widgets.Layout(justify_content="center", padding="0 2px"))

        layout[i, 0] = img_widget
        layout[i, 1] = vintage
        layout[i, 2] = original
        layout[i, 3] = night
        checkboxes.append((img_path, vintage_box, original_box, night_box, metadata))

    button = widgets.Button(description="💾 Sauvegarder les Tags", button_style="primary", layout=widgets.Layout(margin="10px auto", width="200px", display="flex", justify_content="center"))
    output = widgets.Output()

    def save_tags(btn):
        with output:
            output.clear_output()
            for img_path, vintage_box, original_box, night_box, metadata in checkboxes:
                json_filename = os.path.splitext(os.path.basename(img_path))[0] + ".json"
                json_path = os.path.join(metadata_dir, json_filename)

                tags = []
                if vintage_box.value:
                    tags.append("isVintage")
                if original_box.value:
                    tags.append("isOriginal")
                if night_box.value:
                    tags.append("isNightPicture")

                metadata["tags"] = list(set(metadata.get("tags", []) + tags))
                with open(json_path, "w", encoding="utf-8") as f:
                    json.dump(metadata, f, indent=4, ensure_ascii=False)

                print(f"✅ Tags sauvegardés pour {os.path.basename(img_path)}: {tags}")

    button.on_click(save_tags)

    display(widgets.VBox([title, layout, button, output], 
                        layout=widgets.Layout(align_items="center")))


VBox(children=(HTML(value="<h2 style='text-align: center;'>Tag selection</h2>"), GridspecLayout(children=(Imag…