In [4]:
import sys
import os
import json
import base64
from groq import Groq
from dotenv import load_dotenv
import time


sys.path.append(os.path.abspath("..")) 

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

from src.llm_engine import analyse_image
from src.utils import clean_json_output

LLAMA4_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"

In [5]:
def run_llama4_vision(image_path, schema=None):
    start = time.time()
    # encode image to base64
    with open(image_path, "rb") as f:
        img_b64 = base64.b64encode(f.read()).decode('utf-8')
    
    # call analyse_image function
    stream = analyse_image(
        image=img_b64, 
        model=LLAMA4_MODEL, 
        GROQ_API_KEY=GROQ_API_KEY, 
        schema_json=schema
    )
    
    # clean and aggregate streamed response
    full_response = "".join([chunk.choices[0].delta.content or "" for chunk in stream])
    
    cleaned_json = clean_json_output(full_response)
    end_time = time.time()
    print(f"LLama 4 Vision inference time: {end_time - start:.2f} seconds")
    return json.loads(cleaned_json)


In [6]:
image_path ="/Users/tiago/Documents/esilvA5/ecole/Project-LLM/IDP_GenAI_Project/examples/phpCDwGn0.jpg"
target_schema = "/Users/tiago/Documents/esilvA5/ecole/Project-LLM/IDP_GenAI_Project/schemas/id_card_schema.json"
with open(target_schema) as f:
    schema = json.dumps(json.load(f))

In [7]:
run_llama4_vision(image_path, schema=schema)

Modèle utilisé pour l'analyse: meta-llama/llama-4-scout-17b-16e-instruct
Envoi du fichier local à Groq...
LLama 4 Vision inference time: 0.70 seconds


{'type': 'id_card',
 'first_name': 'Audrey',
 'last_name': 'Chevallier',
 'id_number': 'T7X62TZ79',
 'birth_date': '1995-04-01',
 'expiry_date': '2031-01-27'}