In [None]:
from google.colab import drive
drive.mount('/content/drive')

### **INSTALLATION & SETUP**

In [None]:
# Step 1: Uninstall both numpy and opencv
!pip uninstall -y numpy opencv-python opencv-python-headless

# Step 2: Install compatible versions (remove redundant numpy installation)
!pip install numpy==1.23.5 opencv-python==4.8.1.78 --quiet
!pip install --no-cache-dir numpy==1.24.4



In [None]:
!pip uninstall -y transformers huggingface_hub numpy
!pip install transformers==4.39.3 numpy --upgrade --no-cache-dir
!pip install accelerate torchvision --upgrade

In [None]:
!pip install numpy==1.26.4 --no-cache-dir --quiet

In [None]:
!nvidia-smi

In [None]:
import os
HOME = os.getcwd()
print("HOME:", HOME)


In [None]:
!pip install -q 'git+https://github.com/facebookresearch/segment-anything.git'

rerun this cell

In [None]:

!mkdir -p {HOME}/weights
!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -P {HOME}/weights


In [None]:
import os

CHECKPOINT_PATH = os.path.join(HOME, "weights", "sam_vit_h_4b8939.pth")
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))


In [None]:
import torch

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"

In [None]:
# Import necessary modules
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator

# Initialize the SAM model
sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH)
sam.to(device=DEVICE)


In [None]:
mask_generator = SamAutomaticMaskGenerator(sam)

In [None]:
!pip install supervision

In [None]:
import os
import cv2
import supervision as sv

# üîÅ 1. D√©finir le chemin
IMAGE_PATH = "/content/drive/MyDrive/Segme_Vision_Pro_Synchronizer/pic/berlin_322463_1.jpg"

# ‚úÖ 2. V√©rifier que le fichier existe
assert os.path.exists(IMAGE_PATH), f"‚ùå Fichier introuvable : {IMAGE_PATH}"

# üßæ 3. Lire l'image
image_bgr = cv2.imread(IMAGE_PATH)
assert image_bgr is not None, f"‚ùå √âchec de chargement : {IMAGE_PATH}"

# üé® 4. Convertir en RGB (requis pour SAM)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

# üß† 5. G√©n√©rer les masques avec SAM (supposant mask_generator d√©j√† d√©fini)
sam_result = mask_generator.generate(image_rgb)

print(f"‚úÖ {len(sam_result)} masques g√©n√©r√©s avec succ√®s.")


In [None]:
print(sam_result[0].keys())

In [None]:
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)

detections = sv.Detections.from_sam(sam_result=sam_result)

annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)

sv.plot_images_grid(
    images=[image_bgr, annotated_image],
    grid_size=(1, 2),
    titles=['source image', 'segmented image']
)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# Boucle sur chaque masque
for i, result in enumerate(sam_result):
    mask = result['segmentation']  # bool√©en (H, W)

    # Appliquer le masque √† l‚Äôimage
    masked = image_rgb.copy()
    masked[~mask] = 255  # Mettre le fond en blanc

    # Afficher l‚Äôobjet extrait
    plt.figure(figsize=(3,3))
    plt.imshow(masked)
    plt.axis("off")
    plt.title(f"Objet {i}")
    plt.show()

    # Optionnel : Sauvegarde de chaque objet
    img = Image.fromarray(masked)
    img.save(f"/content/mask_object_{i}.png")



**BLIP1**


In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Charger mod√®le BLIP (captioning)
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)



In [None]:
def describe_object(image_rgb, mask):
    # Extraire l'objet masqu√© (fond blanc)
    masked = image_rgb.copy()
    masked[~mask] = 255

    # Convertir en PIL Image
    pil_img = Image.fromarray(masked)

    # Pr√©parer l'entr√©e pour BLIP
    inputs = processor(pil_img, return_tensors="pt").to(device)

    # G√©n√©rer la l√©gende
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# Exemple d'utilisation sur un masque
for i, result in enumerate(sam_result):
    mask = result["segmentation"]

    caption = describe_object(image_rgb, mask)
    print(f"Objet {i} : {caption}")

from supervision import Detections, MaskAnnotator, ColorLookup

# G√©n√©rer les captions pour chaque masque
labels = [describe_object(image_rgb, result["segmentation"]) for result in sam_result]

# Cr√©er les d√©tections avec les captions comme labels
detections = sv.Detections.from_sam(sam_result=sam_result)
detections.labels = labels  # Assigne directement les labels


# Annoter l'image avec les noms d'objets
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)



In [None]:
import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import supervision as sv
from transformers import BlipProcessor, BlipForConditionalGeneration

# --- 1. Charger et pr√©parer l'image ---
IMAGE_PATH = "/content/drive/MyDrive/Segme_Vision_Pro_Synchronizer/pic/berlin_322463_1.jpg"
assert os.path.exists(IMAGE_PATH), f"‚ùå Fichier introuvable : {IMAGE_PATH}"
image_bgr = cv2.imread(IMAGE_PATH)
assert image_bgr is not None, f"‚ùå √âchec de chargement : {IMAGE_PATH}"
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)


# --- 5. Fonction pour g√©n√©rer la description d'un objet masqu√© ---
def describe_object(image_rgb, mask):
    masked = image_rgb.copy()
    masked[~mask] = 255  # Fond blanc
    pil_img = Image.fromarray(masked)
    inputs = processor(pil_img, return_tensors="pt").to(device)
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# --- 6. G√©n√©rer toutes les annotations (captions) ---
labels = []
for i, result in enumerate(sam_result):
    mask = result["segmentation"]  # masque bool√©en

    # Image objet avec fond blanc
    masked = image_rgb.copy()
    masked[~mask] = 255  # fond blanc
    mask = result["segmentation"]
    caption = describe_object(image_rgb, mask)
    labels.append(caption)
    # Affichage
    plt.figure(figsize=(4,4))
    plt.imshow(masked)
    plt.title(f"Objet {i} : {caption}", fontsize=10)
    plt.axis("off")
    plt.show()


# Cr√©er les d√©tections avec les captions comme labels
detections = sv.Detections.from_sam(sam_result=sam_result)
detections.labels = labels  # Assigne directement les labels


# Annoter l'image avec les noms d'objets
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)


In [None]:

# --- 7. Regrouper toutes les annotations en une seule phrase ---
if len(labels) > 1:
    phrase_complete = "; ".join(labels[:-1]) + " et " + labels[-1]
elif labels:
    phrase_complete = labels[0]
else:
    phrase_complete = ""

print("\n--- Phrase compl√®te regroupant toutes les annotations ---\n")
print(phrase_complete)

In [None]:
!pip install transformers torch sentencepiece

In [None]:
!pip install mistral_inference

In [None]:
from huggingface_hub import login
login("YOUR_TOKEN_HERE")

In [None]:
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
mistral_models_path.mkdir(parents=True, exist_ok=True)

snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)

In [None]:
!pip install --upgrade fastai

In [None]:
!pip install --upgrade torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_id)


In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA"
                    },
                    "format": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use. Infer this from the user's location."
                    }
                },
                "required": ["location", "format"]
            }
        }
    }
]

conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]



# format and tokenize the tool use prompt
inputs = tokenizer.apply_chat_template(
            conversation,
            tools=tools,
            add_generation_prompt=True,
            return_dict=True,
            return_tensors="pt",
)

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")

inputs.to(model.device)
outputs = model.generate(**inputs, max_new_tokens=1000)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
def extract_objects_with_mistral(description: str) -> list[str]:
    prompt = (
        "You are an AI assistant that extracts visual objects from image descriptions.\n"
        "From the following text, list *only the main visible objects* (no colors, no adjectives, no duplicates).\n"
        "Output a comma-separated list in lowercase. End the list with a dot.\n\n"  # ‚ú® Added instruction to end with a dot
        f"Description: {description}\n\n"
        "Objects:"
    )

    # Tokenize and send to model
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=64)

    # Decode output
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # üßº Improved extraction logic: stop at the first period
    object_line = decoded.split("Objects:")[-1].split(".")[0].strip()

    # Clean and split the object list
    object_list = [obj.strip().lower() for obj in object_line.split(",") if obj.strip()]

    return list(set(object_list))  # Remove duplicates just in case

In [None]:
description = "a white light switch plate with a white background; thermorer‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢‚Ñ¢; a white plastic plate with a hole for the bottom; a white background with a white circle; a white and brown flower on a white background; a white wall light with a white shade; a white and brown table with a wooden top; a bed with a brown cover and a white pillow; a wooden door with a handle on it; a wooden shelf with a white background; a brown wooden table with a white background; a tall wooden cabinet with a door; a wooden table with a white background; a wooden shelf with a white background; a pair of white and black metal lamps; a wooden table with a white background; a small wooden object with a white background; a wooden door with a white background; a white background with a black and white border; a black and white floor lamp with a white shade; a white and brown flower on a white background; a white background with a brown and black pattern; a small wooden box with a handle; a white wall mounted light with a white background; a vase with branches on it; a white background with a wooden shelf; a black and white lamp with a white shade; a gold plate with a small square shaped object; a black and white photo of a man in a suit; a white background with a black and white image of a man in a suit; a pair of black and white chairs; the row of leather mules in brown; a small wooden block with a white background; a white and black floor lamp with a white shade; a small bird flying in the sky; a white background with a black and white image; a white background with a black and white image; a black and white photo of a wall light; a black and white photo of a wall light; a white background with a black and white image of a white background with a black and white image of et a white background with a black and white image of a black and white image of a black and white"
#print(description)
objects = extract_objects_with_mistral(description)
print(objects)

**GROUNDINGDINO**

In [None]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git

In [None]:
import transformers
print(transformers.__version__)  # Doit √™tre >= 4.41.0.dev

In [None]:
import requests

import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection

model_id = "IDEA-Research/grounding-dino-tiny"
device = "cuda" if torch.cuda.is_available() else "cpu"

processor = AutoProcessor.from_pretrained(model_id)
from transformers import GroundingDinoForObjectDetection
model = GroundingDinoForObjectDetection.from_pretrained(model_id).to(device)


In [None]:

import torch
from PIL import Image

image = Image.open("/content/drive/MyDrive/Segme_Vision_Pro_Synchronizer/pic/berlin_322463_1.jpg").convert("RGB")

# Ton texte (phrase compl√®te) √† utiliser comme prompt
text = objects  # Exemple, adapte selon ta phrase

# Important: ajout de truncation et max_length
inputs = processor(
    images=image,
    text=text,
    return_tensors="pt",
    truncation=True,
    max_length=256
).to(device)

with torch.no_grad():
    outputs = model(**inputs)

results = processor.post_process_grounded_object_detection(
    outputs,
    inputs.input_ids,
    box_threshold=0.4,
    text_threshold=0.3,
    target_sizes=[image.size[::-1]]
)

print(results)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Afficher l'image
fig, ax = plt.subplots(1, figsize=(12, 9))
ax.imshow(image)

# Dessiner les bo√Ætes autour des objets d√©tect√©s
for result in results[0]["boxes"]:
    box = result.tolist()  # Convertir la bo√Æte en une liste pour un format plus facile √† utiliser
    x, y, w, h = box
    rect = patches.Rectangle((x, y), w - x, h - y, linewidth=2, edgecolor="r", facecolor="none")
    ax.add_patch(rect)

plt.show()

In [None]:
'''import nbformat

file = "/content/synchronizer.ipynb"

# Open the notebook
with open(file, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Check and delete 'widgets' from metadata
if "widgets" in nb.metadata:
    print("üîß Found 'metadata.widgets' ‚Äî removing it...")
    del nb.metadata["widgets"]
else:
    print("‚úÖ No widgets found.")

# Save cleaned notebook
with open(file, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("‚úÖ Notebook cleaned and saved.")


In [None]:
'''import nbformat

file = "/content/synchronizer.ipynb"  # name of your notebook

# Open the notebook
with open(file, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Check and delete 'widgets' from metadata
if "widgets" in nb.metadata:
    print("üîß Found 'metadata.widgets' ‚Äî removing it...")
    del nb.metadata["widgets"]
else:
    print("‚úÖ No widgets found.")

# Save cleaned notebook
with open(file, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("‚úÖ Notebook cleaned and saved.")


In [None]:
'''import json
import sys

def clean_notebook(notebook_path):
    with open('/content/synchronizer.ipynb', 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    # Supprimer les m√©tadonn√©es de widgets au niveau du notebook
    if 'widgets' in notebook.get('metadata', {}):
        del notebook['metadata']['widgets']

    # Nettoyer chaque cellule
    for cell in notebook.get('cells', []):
        # Supprimer les outputs des cellules
        if 'outputs' in cell:
            cell['outputs'] = []

        # Supprimer les m√©tadonn√©es de widgets des cellules
        if 'widgets' in cell.get('metadata', {}):
            del cell['metadata']['widgets']

        # R√©initialiser execution_count
        if 'execution_count' in cell:
            cell['execution_count'] = None

    # Sauvegarder le notebook nettoy√©
    with open(notebook_path, 'w', encoding='utf-8') as f:
        json.dump(notebook, f, indent=2, ensure_ascii=False)

    print(f"‚úÖ Notebook {notebook_path} nettoy√© avec succ√®s!")

# Utilisation
clean_notebook('votre_notebook.ipynb')

In [None]:
'''import json
import os

def deep_clean_notebook(notebook_path):
    print(f"üîç Analyse de {notebook_path}...")

    with open('/content/synchronizer.ipynb', 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    changes_made = False

    # 1. Nettoyer les m√©tadonn√©es au niveau du notebook
    if 'metadata' in notebook:
        if 'widgets' in notebook['metadata']:
            print("‚ùå Suppression des m√©tadonn√©es widgets du notebook")
            del notebook['metadata']['widgets']
            changes_made = True

        # Supprimer aussi d'autres m√©tadonn√©es probl√©matiques
        problematic_keys = ['widget_state', 'application/vnd.jupyter.widget-state+json']
        for key in problematic_keys:
            if key in notebook['metadata']:
                del notebook['metadata'][key]
                changes_made = True

    # 2. Nettoyer chaque cellule
    for i, cell in enumerate(notebook.get('cells', [])):
        # Supprimer les outputs
        if 'outputs' in cell and cell['outputs']:
            print(f"üßπ Nettoyage des outputs de la cellule {i}")
            cell['outputs'] = []
            changes_made = True

        # Supprimer execution_count
        if 'execution_count' in cell and cell['execution_count'] is not None:
            cell['execution_count'] = None
            changes_made = True

        # Nettoyer les m√©tadonn√©es des cellules
        if 'metadata' in cell:
            cell_metadata_keys_to_remove = ['widgets', 'widget_state', 'application/vnd.jupyter.widget-state+json']
            for key in cell_metadata_keys_to_remove:
                if key in cell['metadata']:
                    print(f"‚ùå Suppression des m√©tadonn√©es {key} de la cellule {i}")
                    del cell['metadata'][key]
                    changes_made = True

    # 3. Supprimer compl√®tement la section widgets s'il y en a une au niveau racine
    widgets_keys = ['widgets', 'widget_state']
    for key in widgets_keys:
        if key in notebook:
            print(f"‚ùå Suppression de la cl√© {key} au niveau racine")
            del notebook[key]
            changes_made = True

    # 4. Sauvegarder
    if changes_made:
        with open(notebook_path, 'w', encoding='utf-8') as f:
            json.dump(notebook, f, indent=1, ensure_ascii=False)
        print(f"‚úÖ {notebook_path} nettoy√© avec succ√®s!")
    else:
        print("‚ÑπÔ∏è Aucun changement n√©cessaire")

    return changes_made

# Appliquer le nettoyage
deep_clean_notebook('sam_Blip_HLBB.ipynb')

**generation du HLBB en utilisant une fonction d'inference**

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

# Charger le mod√®le de captioning
processor_blip = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").eval().to("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
def generate_caption_blip(image_pil):
    inputs = processor_blip(image_pil, return_tensors="pt").to(model_blip.device)
    with torch.no_grad():
        output = model_blip.generate(**inputs)
    caption = processor_blip.decode(output[0], skip_special_tokens=True)
    return caption


In [None]:
hlbb_list = []  # liste des objets avec features et captions

for result in results:
    raw_boxes = result["boxes"].cpu().numpy()
    raw_labels = result["text_labels"]

    for box, label in zip(raw_boxes, raw_labels):
        features = extract_hl_features(image_np, box, image.size)
        crop_pil = image.crop(tuple(map(int, box)))

        # Use the BLIP model (loaded in cell GwkPsJyayCi8) to generate the caption
        caption = generate_caption_blip(crop_pil)

        hlbb_list.append({
            "box": [float(v) for v in box.tolist()],
            "label": label,
            "caption": caption,
            "features": {
                "color_histogram": [float(h) for h in features["color_histogram"]],
                "texture_lbp": [float(t) for t in features["texture_lbp"]],
                "aspect_ratio": float(features["aspect_ratio"]),
                "relative_area": float(features["relative_area"])
            }
        })

print("‚úÖ Extraction des HLBB + captions termin√©e.")

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

def show_crop_with_caption(image_pil, box, caption):
    x0, y0, x1, y1 = map(int, box)
    crop = image_pil.crop((x0, y0, x1, y1))

    plt.figure(figsize=(4, 4))
    plt.imshow(crop)
    plt.axis("off")
    plt.title(caption, fontsize=10)
    plt.show()


In [None]:
# Pour chaque objet d√©tect√© dans hlbb_list
for obj in hlbb_list:
    box = obj["box"]
    x0, y0, x1, y1 = map(int, box)
    crop = image.crop((x0, y0, x1, y1))

    # G√©n√®re une description textuelle avec BLIP
    caption = generate_caption_blip(crop)

    # Affiche l‚Äôobjet + caption
    show_crop_with_caption(image, box, caption)

    # (optionnel) Ajoute la caption dans ton objet
    obj["caption"] = caption


In [None]:
def get_position_description(box, image_size):
    x0, y0, x1, y1 = map(int, box)
    img_w, img_h = image_size
    xc = (x0 + x1) / 2  # center x of the box
    yc = (y0 + y1) / 2  # center y of the box

    # Determine horizontal position
    if xc < img_w / 3:
        horizontal = "on the left"
    elif xc < 2 * img_w / 3:
        horizontal = "in the center"
    else:
        horizontal = "on the right"

    # Determine vertical position
    if yc < img_h / 3:
        vertical = "at the top"
    elif yc < 2 * img_h / 3:
        vertical = "in the middle"
    else:
        vertical = "at the bottom"

    return f"{vertical} {horizontal}"



In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def show_full_image_with_captions(image_pil, hlbb_list):
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(image_pil)
    image_size = image_pil.size

    for idx, obj in enumerate(hlbb_list):
        box = obj["box"]
        caption = obj.get("caption", "aucune description")
        x0, y0, x1, y1 = map(int, box)

        # Rectangle
        rect = patches.Rectangle((x0, y0), x1 - x0, y1 - y0, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)

        # Position relative dans l'image
        pos_desc = get_position_description(box, image_size)

        # Texte complet = position + caption
        text = f"{idx+1}. {pos_desc} : {caption}"

        # Affiche le texte juste au-dessus de la box
        ax.text(x0, y0 - 10, text, fontsize=8, color='white', backgroundcolor='black')

    ax.axis('off')
    plt.tight_layout()
    plt.show()


In [None]:
# D‚Äôabord, g√©n√©rer les captions si ce n‚Äôest pas encore fait
for obj in hlbb_list:
    x0, y0, x1, y1 = map(int, obj["box"])
    crop = image.crop((x0, y0, x1, y1))
    obj["caption"] = generate_caption_blip(crop)

# Ensuite, affiche tout dans l‚Äôimage compl√®te
show_full_image_with_captions(image, hlbb_list)


In [None]:
def extract_caption_and_position(hlbb_list, image_size):
    results = []
    for obj in hlbb_list:
        box = obj["box"]
        caption = obj.get("caption", "aucune description")
        position = get_position_description(box, image_size)
        results.append({
            "position": position,
            "caption": caption
        })
    return results


In [None]:

# R√©cup√©ration des positions + captions
caption_position_list = extract_caption_and_position(hlbb_list, image.size)

# Affichage au format demand√©
for idx, item in enumerate(caption_position_list, 1):
    print(f"Objet {idx} is a \"{item['caption']}\" in {item['position']}.")


In [None]:
!pip install nbconvert


CLearing the notebook in order to use it in github


In [None]:
!pip install -q nbformat nbconvert


In [None]:
import nbformat

file = "/content/sam_Blip_VF.ipynb"  # name of your notebook

# Open the notebook
with open(file, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Check and delete 'widgets' from metadata
if "widgets" in nb.metadata:
    print("üîß Found 'metadata.widgets' ‚Äî removing it...")
    del nb.metadata["widgets"]
else:
    print("‚úÖ No widgets found.")

# Save cleaned notebook
with open(file, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("‚úÖ Notebook cleaned and saved.")


In [None]:
import nbformat

file = "/content/sam_Blip_VF.ipynb"  # name of your notebook

# Open the notebook
with open(file, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Check and delete 'widgets' from metadata
if "widgets" in nb.metadata:
    print("üîß Found 'metadata.widgets' ‚Äî removing it...")
    del nb.metadata["widgets"]
else:
    print("‚úÖ No widgets found.")

# Save cleaned notebook
with open(file, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("‚úÖ Notebook cleaned and saved.")


In [None]:
import json
import sys

def clean_notebook(notebook_path):
    with open('/content/sam_Blip_VF.ipynb', 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    # Supprimer les m√©tadonn√©es de widgets au niveau du notebook
    if 'widgets' in notebook.get('metadata', {}):
        del notebook['metadata']['widgets']

    # Nettoyer chaque cellule
    for cell in notebook.get('cells', []):
        # Supprimer les outputs des cellules
        if 'outputs' in cell:
            cell['outputs'] = []

        # Supprimer les m√©tadonn√©es de widgets des cellules
        if 'widgets' in cell.get('metadata', {}):
            del cell['metadata']['widgets']

        # R√©initialiser execution_count
        if 'execution_count' in cell:
            cell['execution_count'] = None

    # Sauvegarder le notebook nettoy√©
    with open(notebook_path, 'w', encoding='utf-8') as f:
        json.dump(notebook, f, indent=2, ensure_ascii=False)

    print(f"‚úÖ Notebook {notebook_path} nettoy√© avec succ√®s!")

# Utilisation
clean_notebook('votre_notebook.ipynb')

In [None]:
import json
import os

def deep_clean_notebook(notebook_path):
    print(f"üîç Analyse de {notebook_path}...")

    with open('/content/sam_Blip_VF.ipynb', 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    changes_made = False

    # 1. Nettoyer les m√©tadonn√©es au niveau du notebook
    if 'metadata' in notebook:
        if 'widgets' in notebook['metadata']:
            print("‚ùå Suppression des m√©tadonn√©es widgets du notebook")
            del notebook['metadata']['widgets']
            changes_made = True

        # Supprimer aussi d'autres m√©tadonn√©es probl√©matiques
        problematic_keys = ['widget_state', 'application/vnd.jupyter.widget-state+json']
        for key in problematic_keys:
            if key in notebook['metadata']:
                del notebook['metadata'][key]
                changes_made = True

    # 2. Nettoyer chaque cellule
    for i, cell in enumerate(notebook.get('cells', [])):
        # Supprimer les outputs
        if 'outputs' in cell and cell['outputs']:
            print(f"üßπ Nettoyage des outputs de la cellule {i}")
            cell['outputs'] = []
            changes_made = True

        # Supprimer execution_count
        if 'execution_count' in cell and cell['execution_count'] is not None:
            cell['execution_count'] = None
            changes_made = True

        # Nettoyer les m√©tadonn√©es des cellules
        if 'metadata' in cell:
            cell_metadata_keys_to_remove = ['widgets', 'widget_state', 'application/vnd.jupyter.widget-state+json']
            for key in cell_metadata_keys_to_remove:
                if key in cell['metadata']:
                    print(f"‚ùå Suppression des m√©tadonn√©es {key} de la cellule {i}")
                    del cell['metadata'][key]
                    changes_made = True

    # 3. Supprimer compl√®tement la section widgets s'il y en a une au niveau racine
    widgets_keys = ['widgets', 'widget_state']
    for key in widgets_keys:
        if key in notebook:
            print(f"‚ùå Suppression de la cl√© {key} au niveau racine")
            del notebook[key]
            changes_made = True

    # 4. Sauvegarder
    if changes_made:
        with open(notebook_path, 'w', encoding='utf-8') as f:
            json.dump(notebook, f, indent=1, ensure_ascii=False)
        print(f"‚úÖ {notebook_path} nettoy√© avec succ√®s!")
    else:
        print("‚ÑπÔ∏è Aucun changement n√©cessaire")

    return changes_made

# Appliquer le nettoyage
deep_clean_notebook('sam_Blip_HLBB.ipynb')

In [None]:
!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace /content/Segma_Vision_Synchronizer_clean.ipynb
