In [75]:
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.messages import SystemMessage, HumanMessage
from sklearn.metrics.pairwise import cosine_similarity
from langchain.schema import AIMessage
import json
import re
from langchain_core.runnables import RunnableLambda
import base64
from PIL import Image
import io
import os
import pandas as pd
import numpy as np
from tabulate import tabulate
import ast

In [76]:
def encode_image(image_path, max_size=(512, 512), quality=80):
    image = Image.open(image_path)

    # Redimensionner l'image
    image.thumbnail(max_size)

    # Convertir en bytes avec compression
    buffer = io.BytesIO()
    image.save(buffer, format="JPEG", quality=quality)

    # Encoder en Base64
    encoded_string = base64.b64encode(buffer.getvalue()).decode("utf-8")

    return encoded_string

In [77]:
def extract_json(response_text):
    """
    Extrait la portion JSON (délimitée par {}) de la réponse textuelle pour seulement avoir le dictionnaire et non le texte généré par l'ia.
    """
    match = re.search(r'\{.*\}', response_text, re.DOTALL)
    if match:
        json_str = match.group()
        try:
            return json.loads(json_str)
        except Exception as e:
            print(f"Erreur lors du chargement du JSON : {e}")
            return None
    else:
        print("Aucun JSON trouvé dans la réponse.")
        return None

In [78]:
def prompt_func(data):
    type_ = data["type"]
    text = data["text"]
    content_parts = []

    if type_ == "keywords":
        image = data["image"]
        image_part = {
            "type": "image_url",
            "image_url": f"data:image/jpeg;base64,{image}",
        }
        content_parts.append(image_part)

    #system_message = SystemMessage(content=data["system_message_text"])

    text_part = {"type": "text", "text": text}

    content_parts.append(text_part)

    human_message = HumanMessage(content=content_parts)

    #return [system_message, human_message]
    return [human_message]

In [79]:
def call_func(chain, prompt):
    try:
        response = chain.invoke(prompt)

        if isinstance(response, AIMessage):
            response_text = response.content
        else:
            response_text = str(response)  # Conversion en string si nécessaire

        #print(f"Reponse du llm : {response_text}")

        return extract_json(response_text)

    except Exception as e:
        print(f"Erreur de parsing JSON : {e}. Nouvelle tentative...")
        return -1

In [80]:
def get_image_paths(directory):
    allowed_extensions = {".jpg", ".jpeg", ".png"}
    image_paths = [os.path.join(directory, filename) for filename in os.listdir(directory) if os.path.splitext(filename)[1].lower() in allowed_extensions]
    return image_paths

In [81]:
def create_df(image_paths):
    image_list = []
    for path in image_paths:
        image = Image.open(path)
        image_name = os.path.basename(path)
        exifdata = image._getexif()
        date_time, localisation = None, None
        if exifdata:
            for tag_id, value in exifdata.items():
                tag = Image.ExifTags.TAGS.get(tag_id, tag_id)
                if tag == "DateTime":
                    date_time = value
                elif tag == "GPSInfo":
                    localisation = value

            image_list.append((image_name, path, date_time, localisation))

        else:
            print("Aucune donnée EXIF trouvée.")

    df = pd.DataFrame(image_list, columns=["image_name", "path", "date_time", "localisation"])
    df["keywords"] = ""
    df["categories"] = ""

    return df

In [82]:
def add_keywords_to_df(image_data, keywords_output):
    if keywords_output:
        # Mise à jour uniquement pour les images présentes dans keywords_output
        image_data["keywords"] = image_data.apply(
            lambda row: keywords_output[row["image_name"]]
            if row["image_name"] in keywords_output else row["keywords"], axis=1
        )
    else:
        print("Aucun mot clé fourni ! ")
    return image_data

In [83]:
def keywords_call(df, image_paths, keywords_chain):
    for i in range(0, len(image_paths)):
        print(f"Image {i} : {image_paths[i]}")
        image_b64 = encode_image(image_paths[i])
        image_name = os.path.basename(image_paths[i])

        wrong_json = True
        max_iter = 100
        while wrong_json and max_iter > 0:
            prompt = {
                "type": "keywords",
                "text": f"""Describe the image {image_name}. My goal is to sort my pictures in my photo albums. But I would like to get ridd of the unecessary ones. For example hte screenshots with text, shopping photos, etc. This list is not exhaustive However, I still want you to keep images of meaningful everyday events. So, tell me whether the image should go in the “others” category or the “albums” category. Give me the answer in JSON format :""",
                "image": image_b64
            }

            keywords_output = call_func(keywords_chain, prompt)
            print(f"Output : {keywords_output}\n")

            if keywords_output is None:
                max_iter -= 1
                print(f"On re-essaie avec au maximum : {max_iter}\n")
            else:
                df = add_keywords_to_df(df, keywords_output)
                wrong_json = False

    return df

In [84]:
def pipeline_keywords(image_paths):
    image_data = create_df(image_paths)
    new_image_paths = image_paths

    prompt_chain = RunnableLambda(prompt_func)
    keyword_chain = prompt_chain | llm

    image_data = keywords_call(image_data, new_image_paths, keyword_chain)


    return image_data

In [85]:
llm = ChatOllama(model="gemma3")
directory = "test_data"

In [86]:
image_paths = get_image_paths(directory)
image_data = pipeline_keywords(image_paths)

Image 0 : test_data\20240822_142034.jpg
Output : {'image_name': '20240822_142034.jpg', 'category': 'others', 'reason': "This appears to be a street scene shot from a car's windshield. It doesn't represent a specific event or memory that would warrant inclusion in a dedicated album. It's a snapshot of a street view."}

Image 1 : test_data\20240822_142036.jpg
Output : {'category': 'albums'}

Image 2 : test_data\20240822_142048.jpg
Output : {'category': 'albums', 'reason': "This image captures a street scene with a prominent building reflected in the wet road. It represents a typical everyday moment and has a certain aesthetic appeal. It's not a screenshot, shopping photo, or other irrelevant image."}

Image 3 : test_data\20240825_121144.jpg
Output : {'category': 'albums'}

Image 4 : test_data\20240826_184159.jpg
Output : {'image_name': '20240826_184159.jpg', 'category': 'albums', 'reason': "This appears to be a scenic, over-the-shoulder shot of a path or trail. It's a visually interestin