In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from tqdm import tqdm


In [12]:
import os
import cv2
from PIL import Image
import exifread
from shutil import copy2

# Configuration
input_folder = 'dataset/images'  # Dossier contenant les dossiers 'train' et 'test'
output_folder = 'output/filtered'
min_resolution = (512, 512)

# Fonction pour détecter le flou
def is_blurry(image_path, threshold=100.0):
    image = cv2.imread(image_path)
    if image is None:
        return True
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lap_var = cv2.Laplacian(gray, cv2.CV_64F).var()
    return lap_var < threshold

# Fonction pour lire les métadonnées GPS
def has_gps_metadata(image_path):
    try:
        with open(image_path, 'rb') as f:
            tags = exifread.process_file(f, stop_tag="GPS GPSLatitude")
            return "GPS GPSLatitude" in tags and "GPS GPSLongitude" in tags
    except Exception:
        return False

# Créer le dossier de sortie
os.makedirs(output_folder, exist_ok=True)

# Parcours récursif des sous-dossiers (train/test)
for root, dirs, files in os.walk(input_folder):
    for filename in files:
        file_path = os.path.join(root, filename)

        try:
            # Vérifier la résolution
            with Image.open(file_path) as img:
                width, height = img.size
                if width < min_resolution[0] or height < min_resolution[1]:
                    continue

            # Vérifier flou
            if is_blurry(file_path):
                continue

            # Vérifier présence GPS
            if not has_gps_metadata(file_path):
                continue

            # Copier l’image filtrée
            copy2(file_path, os.path.join(output_folder, filename))
            print(f"[OK] Image retenue : {filename}")

        except Exception as e:
            print(f"[ERREUR] {filename} ignorée : {e}")


[OK] Image retenue : ID_A1HcV0.jpeg
[OK] Image retenue : ID_A4ZdJC.jpeg
[OK] Image retenue : ID_AaL0uh.jpeg
[OK] Image retenue : ID_aatE8l.jpeg
[OK] Image retenue : ID_agHwYS.jpeg
[OK] Image retenue : ID_AQc6Fa.jpeg
[OK] Image retenue : ID_AqSCmf.jpeg
[OK] Image retenue : ID_asy8YG.jpeg
[OK] Image retenue : ID_AZMNHM.jpeg
[OK] Image retenue : ID_b2RRNg.jpeg
[OK] Image retenue : ID_b9S74F.jpeg
[OK] Image retenue : ID_BaQafS.jpeg
[OK] Image retenue : ID_bdRL7B.jpeg
[OK] Image retenue : ID_beWVeF.jpeg
[OK] Image retenue : ID_bVaZn7.jpeg
[OK] Image retenue : ID_c0U1lA.jpeg
[OK] Image retenue : ID_ccmo9J.jpeg
[OK] Image retenue : ID_Ccy5YA.jpeg
[OK] Image retenue : ID_CEnc58.jpeg
[OK] Image retenue : ID_chDAE3.jpeg
[OK] Image retenue : ID_Cisrab.jpeg
[OK] Image retenue : ID_cLSG16.jpg
[OK] Image retenue : ID_CrSUii.jpeg
[OK] Image retenue : ID_D57TlR.jpeg
[OK] Image retenue : ID_dcwTQ3.jpeg
[OK] Image retenue : ID_deLiI1.jpeg
[OK] Image retenue : ID_Dh68Pg.jpeg
[OK] Image retenue : ID_DitJb