In [None]:
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?grandeville ?grandevilleLabel ?pays ?paysLabel ?image {
  ?grandeville wdt:P31 wd:Q1549591;
               wdt:P17 ?pays;
               wdt:P18 ?image.
 SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 10"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["grandevilleLabel"]["value"],
            result["paysLabel"]["value"],
            result["image"]["value"],
        )
    )

In [None]:
dataframe = pd.DataFrame(array, columns=["ville", "pays", "image"])
dataframe = dataframe.astype(
    dtype={"ville": "<U200", "pays": "<U200", "image": "<U200"}
)
dataframe

In [None]:
import requests
import shutil
import os


def download_image(url):
    if not os.path.exists("images"):
        os.mkdir("images")
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        filename = os.path.join("images", os.path.basename(url))
        print(filename)
        
        if len(filename) > 50:
            return
        
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

In [None]:
dataframe.image.apply(download_image)

In [None]:
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS
import numpy
from sklearn.cluster import KMeans


def getDominatedColors(imgfile):
    # Parcourir les fichiers dans le répertoire d'images
    print(imgfile)
    numarray = numpy.array(imgfile.getdata(), numpy.uint8)
    clusters = KMeans(n_clusters=4, n_init=2)
    clusters.fit(numarray)
    return clusters.cluster_centers_

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            return obj.decode('utf-8', 'ignore')
        return super().default(obj)

metadata = []
i=0
# Parcourir les fichiers dans le répertoire d'images
for filename in os.listdir("images"):
    i+=1
    if filename.endswith('.jpg') or filename.endswith('.JPG') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Ouvrir l'image pour récupérer ses métadonnées
        with Image.open(os.path.join("images", filename)) as img:
            # Récupérer les métadonnées de l'image
            exif_data = img.getexif()
            exif = {}
            if exif_data:
                for tag, value in exif_data.items():
                    decoded_tag = TAGS.get(tag, tag)
                    if isinstance(value, bytes):
                        value = value.decode('utf-8', 'ignore')
                    exif[decoded_tag] = value
            metadata.append({
                'filename': filename,
                'width': img.width,
                'height': img.height,
                'format': img.format,
                'mode': img.mode,
                'dominated_color': getDominatedColors(img).tolist(),
                'exif': exif
            })
    # if i==5:
    #     break
# Enregistrer les métadonnées dans un fichier JSON
with open('metadata.json', 'w') as f:
    json.dump(metadata, f, cls=CustomEncoder)



**Création de groupe d'image et ajout dans les metadata de leurs groupes**