In [1]:
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?grandeville ?grandevilleLabel ?pays ?paysLabel ?image {
  ?grandeville wdt:P31 wd:Q1549591;
               wdt:P17 ?pays;
               wdt:P18 ?image.
 SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 10"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["grandevilleLabel"]["value"],
            result["paysLabel"]["value"],
            result["image"]["value"],
        )
    )

In [2]:
dataframe = pd.DataFrame(array, columns=["ville", "pays", "image"])
dataframe = dataframe.astype(
    dtype={"ville": "<U200", "pays": "<U200", "image": "<U200"}
)
dataframe

Unnamed: 0,ville,pays,image
0,Krasnoïarsk,Russie,http://commons.wikimedia.org/wiki/Special:File...
1,São Paulo,Brésil,http://commons.wikimedia.org/wiki/Special:File...
2,Boston,États-Unis,http://commons.wikimedia.org/wiki/Special:File...
3,Le Caire,Égypte,http://commons.wikimedia.org/wiki/Special:File...
4,Szczecin,Pologne,http://commons.wikimedia.org/wiki/Special:File...
5,Tomsk,Russie,http://commons.wikimedia.org/wiki/Special:File...
6,Katowice,République populaire de Pologne,http://commons.wikimedia.org/wiki/Special:File...
7,Ville de Bruxelles,Belgique,http://commons.wikimedia.org/wiki/Special:File...
8,Gênes,Italie,http://commons.wikimedia.org/wiki/Special:File...
9,Groningue,Pays-Bas,http://commons.wikimedia.org/wiki/Special:File...


In [3]:
import requests
import shutil
import os


def download_image(url):
    if not os.path.exists("images"):
        os.mkdir("images")
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        filename = os.path.join("images", os.path.basename(url))
        print(filename)
        
        if len(filename) > 50:
            return
        
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

In [4]:
dataframe.image.apply(download_image)

images/Aerial%20view%20of%20Krasnoyarsk%201.jpg
images/Sao%20Paulo%20Skyline%20in%20Brazil.jpg
images/Boston%20Financial%20District%20skyline.jpg
images/Kairo%20BW%201.jpg
images/Szczecin%20aerial%203a.jpg
images/Tomsk0293.JPG
images/Katowice%20Rynek.jpg
images/Brussels%20view%20from%20Mont%20des%20Arts%2C%20Brussels%2C%20Belgium%20%28cropped%29.jpg
images/Genova%20panorama%20centro%20storico%20da%20villetta%20Di%20Negro.jpg
images/Hoge%20der%20Aa2.jpg


0    200.0
1    200.0
2    200.0
3    200.0
4    200.0
5    200.0
6    200.0
7      NaN
8      NaN
9    200.0
Name: image, dtype: float64

In [5]:
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS
import numpy
from sklearn.cluster import KMeans


def getDominatedColors(imgfile):
    # Parcourir les fichiers dans le répertoire d'images
    print(imgfile)
    numarray = numpy.array(imgfile.getdata(), numpy.uint8)
    clusters = KMeans(n_clusters=4, n_init=2)
    clusters.fit(numarray)
    return clusters.cluster_centers_

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            return obj.decode('utf-8', 'ignore')
        return super().default(obj)

metadata = []
i=0
# Parcourir les fichiers dans le répertoire d'images
for filename in os.listdir("images"):
    i+=1
    if filename.endswith('.jpg') or filename.endswith('.JPG') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Ouvrir l'image pour récupérer ses métadonnées
        with Image.open(os.path.join("images", filename)) as img:
            # Récupérer les métadonnées de l'image
            exif_data = img.getexif()
            exif = {}
            if exif_data:
                for tag, value in exif_data.items():
                    decoded_tag = TAGS.get(tag, tag)
                    if isinstance(value, bytes):
                        value = value.decode('utf-8', 'ignore')
                    exif[decoded_tag] = value
            metadata.append({
                'filename': filename,
                'width': img.width,
                'height': img.height,
                'format': img.format,
                'mode': img.mode,
                'dominated_color': getDominatedColors(img).tolist(),
                'exif': exif
            })
    # if i==5:
    #     break
# Enregistrer les métadonnées dans un fichier JSON
with open('metadata.json', 'w') as f:
    json.dump(metadata, f, cls=CustomEncoder)



<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3008x2000 at 0x7F3C6CA850D0>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=2816x2112 at 0x7F3C5513D130>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=2196x1680 at 0x7F3C39166A60>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x682 at 0x7F3C39164700>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3000x2000 at 0x7F3C39164730>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=4901x2536 at 0x7F3C39196FD0>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=2272x1704 at 0x7F3C39199850>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=4810x3186 at 0x7F3C39199BE0>


**Création de groupe d'image et ajout dans les metadata de leurs groupes**