Collecte de données

In [13]:
import requests
import shutil
import os
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?breed ?breedLabel ?image {
  ?breed wdt:P31 wd:Q39367;
               wdt:P18 ?image.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 1"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["breedLabel"]["value"],
            result["image"]["value"],
        )
    )


def download_image(url):

    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        with open(os.path.basename(url), "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code
    
download_image(array[0][1])


200

In [32]:
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plot
import math
from collections import defaultdict
import json

class ImageTagger:
    def __init__(self):
        self.color_names = {
            'rouge': ((255, 0, 0), 30),
            'vert': ((0, 255, 0), 30),
            'bleu': ((0, 0, 255), 30),
            'jaune': ((255, 255, 0), 30),
            'noir': ((0, 0, 0), 30),
            'blanc': ((255, 255, 255), 30),
            'rose': ((255, 192, 203), 30)
            # Ajoutez d'autres couleurs selon vos besoins
        }
        
    def get_color_tags(self, cluster_centers):
        """Convertit les centres des clusters en noms de couleurs"""
        color_tags = set()
        for center in cluster_centers:
            rgb = tuple(math.ceil(x) for x in center)
            for color_name, (reference, threshold) in self.color_names.items():
                distance = sum((a - b) ** 2 for a, b in zip(rgb, reference))
                if distance < threshold ** 2:
                    color_tags.add(f"#couleur_{color_name}")
        return color_tags

    def save_tags(self, image_path, tags, output_file="image_tags.json"):
        """Sauvegarde les tags dans un fichier JSON"""
        try:
            with open(output_file, 'r') as f:
                tag_data = json.load(f)
        except (FileNotFoundError, json.JSONDecodeError):
            tag_data = {}
            
        tag_data[image_path] = list(tags)
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(tag_data, f, ensure_ascii=False, indent=2)

    def process_image(self, image_path, user_tags=None):
        """Traite une image pour générer et sauvegarder ses tags"""
        # Analyse des couleurs
        img = Image.open(image_path)
        img_array = np.array(img.getdata(), np.uint8)
        clusters = KMeans(n_clusters=4, n_init=2)
        clusters.fit(img_array)
        
        # Génération des tags de couleurs
        color_tags = self.get_color_tags(clusters.cluster_centers_)
        
        # Combinaison avec les tags utilisateur
        all_tags = color_tags
        if user_tags:
            all_tags.update(user_tags)
            
        # Sauvegarde des tags
        self.save_tags(image_path, all_tags)
        return all_tags

# Exemple d'utilisation
if __name__ == "__main__":
    tagger = ImageTagger()
    image_path = "images/Antebellum%20Bulldog%20Puppies.jpg"
    
    # Tags manuels (peuvent être fournis par l'utilisateur)
    user_tags = {"#chien", "#chiot", "#bulldog"}
    
    # Traitement de l'image
    tags = tagger.process_image(image_path, user_tags)
    print(f"Tags générés pour {image_path}:")
    print(tags)

Tags générés pour images/Antebellum%20Bulldog%20Puppies.jpg:
{'#chien', '#bulldog', '#chiot'}
