In [1]:
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?grandeville ?grandevilleLabel ?pays ?paysLabel ?image {
  ?grandeville wdt:P31 wd:Q1549591;
               wdt:P17 ?pays;
               wdt:P18 ?image.
 SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 10"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["grandevilleLabel"]["value"],
            result["paysLabel"]["value"],
            result["image"]["value"],
        )
    )

In [2]:
dataframe = pd.DataFrame(array, columns=["ville", "pays", "image"])
dataframe = dataframe.astype(
    dtype={"ville": "<U200", "pays": "<U200", "image": "<U200"}
)
dataframe

Unnamed: 0,ville,pays,image
0,Dakar,Sénégal,http://commons.wikimedia.org/wiki/Special:File...
1,Addis-Abeba,Éthiopie,http://commons.wikimedia.org/wiki/Special:File...
2,Tripoli,Libye,http://commons.wikimedia.org/wiki/Special:File...
3,Asmara,Érythrée,http://commons.wikimedia.org/wiki/Special:File...
4,Neuss,Allemagne,http://commons.wikimedia.org/wiki/Special:File...
5,Göttingen,Allemagne,http://commons.wikimedia.org/wiki/Special:File...
6,Chillán,Chili,http://commons.wikimedia.org/wiki/Special:File...
7,Witten,Allemagne,http://commons.wikimedia.org/wiki/Special:File...
8,Praia,Cap-Vert,http://commons.wikimedia.org/wiki/Special:File...
9,Tambov,Russie,http://commons.wikimedia.org/wiki/Special:File...


In [3]:
import requests
import shutil
import os


def download_image(url):
    if not os.path.exists("images"):
        os.mkdir("images")
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        filename = os.path.join("images", os.path.basename(url))
        print(filename)
        
        if len(filename) > 50:
            return
        
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

In [4]:
dataframe.image.apply(download_image)

images/Pointe%20des%20Almadies%20-%20Senegal.jpg
images/Addis%20abeba%20meskele%20square.jpg
images/Marcus%20Aurelius%20Arch%20Tripoli%20Libya.jpg
images/Asmara2.jpg
images/Neuss%2C%20Rathaus%202008.JPG
images/Goettingen%20Marktplatz%20Oct06%20Antilived.jpg
images/Ciudad%20de%20Chill%C3%A1n.png
images/Witten%2020090218.jpg
images/Praia%20Harbor.jpg
images/Tambov%20TransgfigurationCathedral%20101%200477.jpg


0    200.0
1    200.0
2      NaN
3    200.0
4    200.0
5      NaN
6    200.0
7    200.0
8    200.0
9      NaN
Name: image, dtype: float64

In [5]:
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            return obj.decode('utf-8', 'ignore')
        return super().default(obj)

metadata = []

# Parcourir les fichiers dans le répertoire d'images
for filename in os.listdir("images"):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Ouvrir l'image pour récupérer ses métadonnées
        with Image.open(os.path.join("images", filename)) as img:
            # Récupérer les métadonnées de l'image
            exif_data = img.getexif()
            exif = {}
            if exif_data:
                for tag, value in exif_data.items():
                    decoded_tag = TAGS.get(tag, tag)
                    if isinstance(value, bytes):
                        value = value.decode('utf-8', 'ignore')
                    exif[decoded_tag] = value
            metadata.append({
                'filename': filename,
                'width': img.width,
                'height': img.height,
                'format': img.format,
                'mode': img.mode,
                'exif': exif
            })
# Enregistrer les métadonnées dans un fichier JSON
with open('metadata.json', 'w') as f:
    json.dump(metadata, f, cls=CustomEncoder)