In [None]:
from groq import Groq
import os
import base64
import time

VISION_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct" 

# --- FONCTION POUR ENCODER L'IMAGE ---
def encode_image(image_path):
    """Encodes a local file to a Base64 string."""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Erreur: Le fichier n'a pas été trouvé à l'emplacement: {image_path}")
        return None
    except Exception as e:
        print(f"Erreur lors de l'encodage de l'image: {e}")
        return None

# --- VÉRIFICATION ET ENCODAGE ---
def check_file_exists(file_path):
    """Vérifie si le fichier existe à l'emplacement donné."""
    return os.path.isfile(file_path)

def generate_base64_from_local(file_path):
    """Génère une chaîne Base64 à partir d'un fichier local."""
    if not check_file_exists(file_path):
        print(f"Erreur: Le fichier n'existe pas à l'emplacement: {file_path}")
        return None
    return encode_image(file_path)

def generate_answer():
    local_file_path = r"C:\Users\ihadi\Downloads\téléchargement (1).jpeg" 
    base64_image = generate_base64_from_local(local_file_path)

    if not base64_image:
        exit()

    # Format Base64 obligatoire pour l'API
    base64_url = f"data:image/jpeg;base64,{base64_image}"

    # --- APPEL À L'API GROQ ---
    client = Groq(api_key=os.getenv("GROQ_API_KEY"))

    # Note: Je remplace le modèle par le modèle Vision Groq supporté (Llama 3.2 Vision)
    # Si vous avez un accès spécial, vous pouvez remettre votre nom de modèle.
    

    start_time = time.time()
    print(f"Envoi du fichier local ({local_file_path}) à Groq...")

    completion = client.chat.completions.create(
        model=VISION_MODEL,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "I want you to generate a Json file of the document. Json file : "
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            # Utilisation de la chaîne Base64 encodée
                            "url": base64_url
                        }
                    }
                ]
            }
        ],
        temperature=1,
        max_completion_tokens=1024,
        top_p=1,
        stream=True,
        stop=None
    )

    print("\n--- RÉSULTAT (Généré par Groq) ---")
    for chunk in completion:
        print(chunk.choices[0].delta.content or "", end="")

    end_time = time.time()
    print(f"\n\nTemps d'exécution total: {end_time - start_time:.2f} secondes")

Envoi du fichier local (C:\Users\ihadi\Downloads\téléchargement (1).jpeg) à Groq...

--- RÉSULTAT (Généré par Groq) ---
Here is a JSON representation of the receipt document:


```json
{
  "store": "Lidl",
  "receiptType": "Ticket de caisse",
  "date": null,
  "items": [
    {
      "name": "carottes",
      "price": 0.69,
      "unit": "kg",
      "quantity": 0.338
    },
    {
      "name": "poivrons",
      "price": 2.99,
      "unit": null,
      "quantity": 1
    },
    {
      "name": "oignons",
      "price": 0.69,
      "unit": "kg",
      "quantity": 0.228
    },
    {
      "name": "aubergines",
      "price": 1.75,
      "unit": null,
      "quantity": 1
    },
    {
      "name": "tomates",
      "price": 1.29,
      "unit": null,
      "quantity": 1
    },
    {
      "name": "Fraisas",
      "price": 2.99,
      "unit": null,
      "quantity": 1
    },
    {
      "name": "Petit crevettes",
      "price": 6.99,
      "unit": null,
      "quantity": 1
    },
    {
      "n

In [8]:
# # Load model directly
# from transformers import AutoProcessor, AutoModelForVision2Seq
# import torch 

# processor = AutoProcessor.from_pretrained("C:\\Users\\ihadi\\Desktop\\Semestre 9\\LLM and Gen AI\\Projet\\IDP_GenAI_Project\\models\\Fara-7B", 
#                                           torch_dtype=torch.float16,
#                                           device_map="auto")

# model = AutoModelForVision2Seq.from_pretrained("C:\\Users\\ihadi\\Desktop\\Semestre 9\\LLM and Gen AI\\Projet\\IDP_GenAI_Project\\models\\Fara-7B",
#                                                torch_dtype=torch.float16,
#                                                device_map="auto")

In [9]:
from transformers import AutoModel

model = AutoModel.from_pretrained("nvidia/NVIDIA-Nemotron-Parse-v1.1-TC", 
                                  trust_remote_code=True, 
                                  dtype="auto",
                                  cache_dir=LOCAL_MODEL_PATH)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
A new version of the following files was downloaded from https://huggingface.co/nvidia/NVIDIA-Nemotron-Parse-v1.1-TC:
- hf_nemotron_parse_config.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
You are using a model of type nemotron_parse_tc to instantiate a model of type nemotron_parse. This is not supported for all configurations of models and can yield errors.
A new version of the following files was downloaded from https://huggingface.co/nvidia/C-RADIOv2-H:
- extra_timm_models.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revisi

In [1]:
from transformers import AutoProcessor, Florence2ForConditionalGeneration
import torch
from PIL import Image
import os 
import time

# --- CHEMINS (Utilisez le chemin absolu pour une fiabilité maximale) ---
LOCAL_MODEL_PATH = r"C:\Users\ihadi\Desktop\Semestre 9\LLM and Gen AI\Projet\IDP_GenAI_Project\models\Florence-2-base" 
local_file_path = r"C:\Users\ihadi\Downloads\téléchargement (1).jpeg"

try:
    image = Image.open(local_file_path).convert("RGB")
except FileNotFoundError:
    print(f"Erreur: Le fichier image n'a pas été trouvé à l'emplacement: {local_file_path}")
    exit()

print("Démarrage du chargement du modèle sur CPU...")
start_time = time.time()

# --- CHARGEMENT : device_map="cpu" est CRUCIAL ---
model = Florence2ForConditionalGeneration.from_pretrained(
    LOCAL_MODEL_PATH, 
    trust_remote_code=True, 
    torch_dtype=torch.float32
)

processor = AutoProcessor.from_pretrained(
    LOCAL_MODEL_PATH, 
    trust_remote_code=True
)

# --- INFÉRENCE : La solution de contournement du Token ---

# 1. Obtenir le token image exact et le token de début de séquence (BOS)
IMAGE_TOKEN = processor.tokenizer.additional_special_tokens[0]
BOS_TOKEN = processor.tokenizer.bos_token

# 2. Construire la chaîne complète : BOS + Image Token + Instruction
# L'ajout du BOS token rend souvent l'inférence plus stable.
prompt_instructionnel = "Generate a JSON of the document." 
full_prompt = BOS_TOKEN + IMAGE_TOKEN + prompt_instructionnel 

print(f"Prompt final : {full_prompt}")

# 3. Utiliser le processeur pour l'image, mais le tokenizer pour le texte
# Le processeur doit voir l'image et le texte dans le bon ordre.
# Le processeur Florence-2 gère l'encodage du texte pour nous.
inputs = processor(
    text=full_prompt, 
    images=image, 
    return_tensors="pt"
)

# Déplacement des inputs sur CPU
inputs = {k: v.to(model.device) for k, v in inputs.items()}

print("Génération de la réponse...")
generated_ids = model.generate(
    **inputs, 
    max_new_tokens=512, 
    do_sample=False
)

# --- DÉCODAGE ---
result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
end_time = time.time()

print("\n--- RÉSULTAT ---")
print(result)
print(f"\nTemps de génération total: {end_time - start_time:.2f} secondes.")

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!


Démarrage du chargement du modèle sur CPU...


Some weights of the model checkpoint at C:\Users\ihadi\Desktop\Semestre 9\LLM and Gen AI\Projet\IDP_GenAI_Project\models\Florence-2-base were not used when initializing Florence2ForConditionalGeneration: ['image_pos_embed.column_embeddings.weight', 'image_pos_embed.row_embeddings.weight', 'image_proj_norm.bias', 'image_proj_norm.weight', 'image_projection', 'language_model.final_logits_bias', 'language_model.model.decoder.embed_positions.weight', 'language_model.model.decoder.layernorm_embedding.bias', 'language_model.model.decoder.layernorm_embedding.weight', 'language_model.model.decoder.layers.0.encoder_attn.k_proj.bias', 'language_model.model.decoder.layers.0.encoder_attn.k_proj.weight', 'language_model.model.decoder.layers.0.encoder_attn.out_proj.bias', 'language_model.model.decoder.layers.0.encoder_attn.out_proj.weight', 'language_model.model.decoder.layers.0.encoder_attn.q_proj.bias', 'language_model.model.decoder.layers.0.encoder_attn.q_proj.weight', 'language_model.model.decod

Prompt final : <s><od>Generate a JSON of the document.
Génération de la réponse...


ValueError: Image features and image tokens do not match: tokens: 0, features 577

In [1]:
# WARNING: This snippet is not yet compatible with SageMaker version >= 3.0.0.
# To use this snippet, install a compatible version:
# pip install 'sagemaker<3.0.0'
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'microsoft/Fara-7B',
	'SM_NUM_GPUS': json.dumps(1)
}



# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="3.3.6"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "Hi, what can you help me with?",
})

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ihadi\AppData\Local\sagemaker\sagemaker\config.yaml


