In [20]:
!pip install sparqlwrapper



In [21]:
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?grandeville ?grandevilleLabel ?pays ?paysLabel ?image {
  ?grandeville wdt:P31 wd:Q1549591;
               wdt:P17 ?pays;
               wdt:P18 ?image.
 SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 500"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["grandevilleLabel"]["value"],
            result["paysLabel"]["value"],
            result["image"]["value"],
        )
    )

In [22]:
dataframe = pd.DataFrame(array, columns=["ville", "pays", "image"])
dataframe = dataframe.astype(
    dtype={"ville": "<U200", "pays": "<U200", "image": "<U200"}
)
dataframe

Unnamed: 0,ville,pays,image
0,Barquisimeto,Venezuela,http://commons.wikimedia.org/wiki/Special:File...
1,Winterthour,Suisse,http://commons.wikimedia.org/wiki/Special:File...
2,Siliguri,Inde,http://commons.wikimedia.org/wiki/Special:File...
3,Boukhara,Ouzbékistan,http://commons.wikimedia.org/wiki/Special:File...
4,Pokhara,Népal,http://commons.wikimedia.org/wiki/Special:File...
...,...,...,...
495,Washington,États-Unis,http://commons.wikimedia.org/wiki/Special:File...
496,Turin,Italie,http://commons.wikimedia.org/wiki/Special:File...
497,Lyon,France,http://commons.wikimedia.org/wiki/Special:File...
498,Aix-la-Chapelle,Allemagne,http://commons.wikimedia.org/wiki/Special:File...


In [23]:
import requests
import shutil
import os


def download_image(url):
    if not os.path.exists("images"):
        os.mkdir("images")
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        filename = os.path.join("images", os.path.basename(url))
        print(filename)
        
        if len(filename) > 50:
            return
        
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

In [24]:
dataframe.image.apply(download_image)

images/Baqtoeste.jpg
images/View%20of%20Winterthur.jpg
images/Siliguri%20view.jpg
images/Kalon-Ensemble%20Buchara.jpg
images/Pokhara1.JPG
images/Surat%20at%20night.JPG
images/Brescia%20city%20skyline%20from%20the%20city%20castle.jpg
images/%D0%A3%D1%81%D1%81%D1%83%D1%80%D0%B8%D0%B9%D1%81%D0%BA%D0%B8%D0%B9%20%D0%B1%D1%83%D0%BB%D1%8C%D0%B2%D0%B0%D1%80.jpg
images/Kampalamontage.png
images/TalferbrueckeBozenMeranBahn.jpg
images/Huntington%20Beach%20CA%20Aerial%20by%20Don%20Ramey%20Logan.jpg
images/Castello%20di%20Miranda%20%28XI-XII%20secolo%2C%20sullo%20sfondo%20%C3%A8%20visibile%20la%20valle%20di%20Terni%29.jpg
images/Durban%20-%20panoramio%20-%20---%3DXEON%3D---.jpg
images/Aerial%20photographs%20of%20Izhevsk-120.jpg
images/%D0%A3%D0%BB%D1%8C%D1%8F%D0%BD%D0%BE%D0%B2%D1%81%D0%BA.jpg
images/Barnaul%20Skyline%202007.jpg
images/Caio%20Badner%20-%20Osasco.JPG
images/Broadgate%20and%20Precincts%20-%20geograph.org.uk%20-%20554599.jpg
images/Toulouse%20-%20View%20on%20Saint%20Sernin.jpg
images/R

KeyboardInterrupt: 

for image in dataframe.image:
    print(os.path.basename(image))

In [36]:

import os
import json
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            return obj.decode('utf-8', 'ignore')
        return super().default(obj)

metadata = []

# Parcourir les fichiers dans le répertoire d'images
for filename in os.listdir("images"):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Ouvrir l'image pour récupérer ses métadonnées
        with Image.open(os.path.join("images", filename)) as img:
            # Récupérer les métadonnées de l'image
            exif_data = img.getexif()
            exif = {}
            exifFiltered = {}
            if exif_data:
                for tag, value in exif_data.items():
                    decoded_tag = TAGS.get(tag, tag)
                    if isinstance(value, bytes):
                        value = value.decode('utf-8', 'ignore')
                    exif[decoded_tag] = value
            
            exifFiltered["Orientation"] = exif.get('Orientation')
            exifFiltered["DateTime"] = exif.get('DateTimeOriginal') or exif.get('DateTime')
            exifFiltered["YCbCrPositioning"] = exif.get('YCbCrPositioning')
            exifFiltered["Compression"] = exif.get('Compression')
            
            # Camera make and model
            exifFiltered["CameraMake"] = exif.get('Make')
            exifFiltered["CameraModel"] = exif.get('Model')

            # GPS location information (if available)
            # if 'GPSInfo' in exif:
            #     gps_info = {}
            #     for key in exif['GPSInfo'].keys():
            #         decoded_key = GPSTAGS.get(key,key)
            #         gps_info[decoded_key] = exif['GPSInfo'][key]
            #     exifFiltered['GPSInfo'] = gps_info

            # Image resolution and dimensions
            exifFiltered['ImageWidth'] = img.width
            exifFiltered['ImageHeight'] = img.height

            # Exposure settings
            # exifFiltered['ExposureTime'] = exif.get('ExposureTime')
            # exifFiltered['FNumber'] = exif.get('FNumber')
            exifFiltered['ISO'] = exif.get('ISOSpeedRatings')

            # White balance setting
            exifFiltered['WhiteBalance'] = exif.get('WhiteBalance')

            # Focal length and lens information
            # exifFiltered['FocalLength'] = exif.get('FocalLength')
            exifFiltered['LensModel'] = exif.get('LensModel')

            # Image compression type
            exifFiltered['CompressionType'] = exif.get('Compression')

            # Metering mode
            exifFiltered['MeteringMode'] = exif.get('MeteringMode')

            # Flash usage
            exifFiltered['Flash'] = exif.get('Flash')

            metadata.append({
                'filename': filename,
                'width': img.width,
                'height': img.height,
                'format': img.format,
                'mode': img.mode,
                'exif': exifFiltered
            })
# Enregistrer les métadonnées dans un fichier JSON
with open('metadata.json', 'w') as f:
    json.dump(metadata, f, cls=CustomEncoder)
