# Project

## Dépendance

In [80]:
! pip install SPARQLWrapper
! pip install exifread

Defaulting to user installation because normal site-packages is not writeable
[33mDEPRECATION: gpg 1.14.0-unknown has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of gpg or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mDefaulting to user installation because normal site-packages is not writeable
[33mDEPRECATION: gpg 1.14.0-unknown has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of gpg or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

## Collecte de données

In [81]:
import pandas as pd
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?grandeville ?grandevilleLabel ?pays ?paysLabel ?image {
  ?grandeville wdt:P31 wd:Q1549591;
               wdt:P17 ?pays;
               wdt:P18 ?image.
 SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 100"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["grandevilleLabel"]["value"],
            result["paysLabel"]["value"],
            result["image"]["value"],
        )
    )

dataframe = pd.DataFrame(array, columns=["ville", "pays", "image"])
dataframe = dataframe.astype(
    dtype={"ville": "<U200", "pays": "<U200", "image": "<U200"}
)
dataframe

Unnamed: 0,ville,pays,image
0,Krasnoïarsk,Russie,http://commons.wikimedia.org/wiki/Special:File...
1,São Paulo,Brésil,http://commons.wikimedia.org/wiki/Special:File...
2,Boston,États-Unis,http://commons.wikimedia.org/wiki/Special:File...
3,Le Caire,Égypte,http://commons.wikimedia.org/wiki/Special:File...
4,Szczecin,Pologne,http://commons.wikimedia.org/wiki/Special:File...
...,...,...,...
95,Padoue,Italie,http://commons.wikimedia.org/wiki/Special:File...
96,Buenos Aires,Argentine,http://commons.wikimedia.org/wiki/Special:File...
97,Lyon,France,http://commons.wikimedia.org/wiki/Special:File...
98,Le Mans,France,http://commons.wikimedia.org/wiki/Special:File...


### Téléchargement des images

In [82]:
import requests
import shutil
import os

def download_image(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        # Create 'images' directory if it doesn't exist
        if not os.path.exists("images"):
            os.makedirs("images")

        # Extract the filename from the URL and save the image in 'images' directory
        filename = os.path.join("images", os.path.basename(url))
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

def clear_images_directory():
    # Clear 'images' directory if it exists
    if os.path.exists("images"):
        shutil.rmtree("images")

# Clear 'images' directory before every execution
clear_images_directory()

# Assuming 'dataframe' is a DataFrame containing URLs under column 'image'
# Apply the download_image function to each URL in the 'image' column
dataframe.image.apply(download_image)

0     200
1     200
2     200
3     200
4     200
     ... 
95    200
96    200
97    200
98    200
99    200
Name: image, Length: 100, dtype: int64

### Enregistrement des métadonnées
#### Import

In [83]:
import os
import json
from PIL import Image, TiffImagePlugin
import PIL.ExifTags

#### Define the directory path and initialize the list to store metadata:

In [84]:
directory_path = "images"
all_metadata = []

if not os.path.isdir(directory_path):
    print(f"Error: Directory '{directory_path}' does not exist!")
    sys.exit()

#### Define the function to cast data types

In [85]:
def cast(v):
    if isinstance(v, TiffImagePlugin.IFDRational):
        if v.denominator == 0:
            return None  # Handle division by zero gracefully
        return float(v.numerator) / float(v.denominator)
    elif isinstance(v, tuple):
        return tuple(cast(t) for t in v)
    elif isinstance(v, bytes):
        return v.decode(errors="replace")
    elif isinstance(v, dict):
        for kk, vv in v.items():
            v[kk] = cast(vv)
        return v
    else:
        return v

#### Define the function to get the images properties

In [4]:
def get_image_metadata(imgfile):
    metadata = {}

    # Get image format
    metadata['format'] = imgfile.format

    # Get image size
    metadata['size'] = imgfile.size
    
    exif = dict(imgfile._getexif().items())
    if exif:
        # Get image orientation (landscape, portrait, square, etc.)
        if 274 in exif:
            orientation = exif[274]
            if orientation == 1:
                metadata['orientation'] = 'Landscape'
            elif orientation == 3:
                metadata['orientation'] = 'Portrait'
            else:
                metadata['orientation'] = 'Unknown'
        else:
            metadata['orientation'] = 'Unknown'
    return metadata

#### Loop through files in the directory and extract metadata

In [6]:
import os
import json
import sys
from PIL import Image, TiffImagePlugin
import PIL.ExifTags

directory_path = "images"
all_metadata = []

if not os.path.isdir(directory_path):
    print(f"Error: Directory '{directory_path}' does not exist!")
    sys.exit()

for filename in os.listdir(directory_path):
    if os.path.isfile(os.path.join(directory_path, filename)) and filename.lower().endswith((".jpg", ".png")):
        print(f"Found image: {filename}")

        try:
            imgfile = Image.open(os.path.join(directory_path, filename))
            exif_data = imgfile._getexif()
            
            metadata_dict = {}

            if exif_data:
                for k, v in imgfile._getexif().items():
                    if k in PIL.ExifTags.TAGS:
                        v = cast(v)
                        metadata_dict[PIL.ExifTags.TAGS[k]] = v
            else:
                print(f"  - No EXIF data found for {filename}")

            # Get additional image metadata (format, size, orientation)
            image_metadata = get_image_metadata(imgfile)
            metadata_dict.update(image_metadata)
            
            all_metadata.append({filename: metadata_dict})
            if not metadata_dict:
                print(f"  - No metadata found for {filename}")

        except Exception as e:
            print(f"  - Error processing {filename}: {e}")

output_file = "directory_metadata.json"
with open(output_file, "w") as json_file:
    json.dump(all_metadata, json_file, indent=4)

print(f"Directory metadata saved to {output_file} (if no errors occurred)")


Found image: 19-03-03-Maribor-RalfR-DJI%200444.jpg
Found image: Szczecin%20aerial%203a.jpg
Found image: Omsk%20Collage%202016.png
  - No EXIF data found for Omsk%20Collage%202016.png
  - Error processing Omsk%20Collage%202016.png: 'NoneType' object has no attribute 'items'
Found image: Zagreb%20%2829255640143%29.jpg
Found image: Grenoble%2001.JPG
Found image: Stadtbild%20K%C3%B6ln%20%2850MP%29.jpg
Found image: Venice%20Old%20Town%20Lagoon%20Aerial%20View.jpg
  - No EXIF data found for Venice%20Old%20Town%20Lagoon%20Aerial%20View.jpg
  - Error processing Venice%20Old%20Town%20Lagoon%20Aerial%20View.jpg: 'NoneType' object has no attribute 'items'
Found image: 00%206191%20Maastricht%20-%20Niederlande.jpg
Found image: ISS-38%20Nighttime%20image%20of%20Moscow%2C%20Russia.jpg
Found image: Vue%20Cath%C3%A9drale%20Jardins%20Pierre%20de%20Ronsard.JPG
Found image: Amdavad%20Aerial.jpg
  - No EXIF data found for Amdavad%20Aerial.jpg
  - Error processing Amdavad%20Aerial.jpg: 'NoneType' object has

In [88]:
! pip install keras

Defaulting to user installation because normal site-packages is not writeable
Collecting keras
  Downloading keras-3.1.1-py3-none-any.whl.metadata (5.6 kB)
Collecting absl-py (from keras)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting rich (from keras)
  Downloading rich-13.7.1-py3-none-any.whl.metadata (18 kB)
Collecting namex (from keras)
  Downloading namex-0.0.7-py3-none-any.whl.metadata (246 bytes)
Collecting h5py (from keras)
  Downloading h5py-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)
Collecting optree (from keras)
  Downloading optree-0.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.4/45.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ml-dtypes (from keras)
  Downloading ml_dtypes-0.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting markdown-it-py>=2.2.0 (from 

In [None]:
from keras.applications import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np

# Load pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=True)

# Load and preprocess an image
img_path = 'images/Zicht%20op%20Zwolle%20Centrum.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

# Use the pre-trained model to make predictions
preds = model.predict(x)

# Decode the predictions
decoded_preds = decode_predictions(preds, top=3)[0]  # Get top 3 predictions
for i, (imagenet_id, label, score) in enumerate(decoded_preds):
    print(f'{i+1}: {label} ({score:.2f})')
