# Import des bibliothèques 

In [None]:
import torch
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
import cv2
import open_clip
import torch
from PIL import Image
import numpy as np
import pickle
from tqdm import tqdm
import os 
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve
from pycocotools.coco import COCO

# Installation de Detectron2

# Construction de Runa

### Import du faster RCNN

In [None]:
cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
    )
)

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
)

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

predictor = DefaultPredictor(cfg)


### Fonction pour blur et crop

In [None]:
def detect_crop_and_blurv2(image, predictor, blur_ksize=51):
    """
    image : numpy array BGR (cv2.imread)
    predictor : Detectron2 DefaultPredictor
    blur_ksize : taille du kernel de flou (impair)

    retourne :
        results : liste de dict :
            {
                "box": [x1, y1, x2, y2],
                "crop": image cropée,
                "blurred": image globale avec bbox floutée
            }
    """

    outputs = predictor(image)
    instances = outputs["instances"].to("cpu")

    if len(instances) == 0:
        return []

    boxes = instances.pred_boxes.tensor.numpy().astype(int)

    results = []

    for box in boxes:
        x1, y1, x2, y2 = box

        # ---- Crop régional (I(r)) ----
        crop = image[y1:y2, x1:x2].copy()

        # ---- Image globale avec bbox floutée (I(g)) ----
        blurred_img = image.copy()

        roi = blurred_img[y1:y2, x1:x2]
        roi_blur = cv2.GaussianBlur(roi, (blur_ksize, blur_ksize), 0)

        blurred_img[y1:y2, x1:x2] = roi_blur

        results.append({
            "box": [x1, y1, x2, y2],
            "crop": crop,
            "blurred": blurred_img
        })

    return results


### Mise en place de CLIP

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model, preprocess, tokenizer = open_clip.create_model_and_transforms(
    "ViT-B-16",
    pretrained="openai"
)

model = model.to(device)
model.eval()


In [None]:
def encode_image_clip(image_bgr):
    """
    image_bgr : image OpenCV
    retourne : embedding torch (1, D)
    """

    # BGR → RGB
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

    # numpy → PIL
    pil_img = Image.fromarray(image_rgb)

    # preprocessing CLIP
    image_input = preprocess(pil_img).unsqueeze(0).to(device)

    with torch.no_grad():
        embedding = model.encode_image(image_input)

    return embedding


In [None]:
def encode_runa_batch(results):

    crop_embeddings = []
    global_embeddings = []

    for r in results:
        emb_crop = encode_image_clip(r["crop"])
        emb_global = encode_image_clip(r["blurred"])

        crop_embeddings.append(emb_crop)
        global_embeddings.append(emb_global)

    return crop_embeddings, global_embeddings


In [None]:
def normalize_embedding(emb):
    return emb / emb.norm(dim=-1, keepdim=True)


In [None]:
def fuse_embeddings(crop_emb, global_emb, lam=0.5):
    fused = lam * crop_emb + (1 - lam) * global_emb
    fused = fused / fused.norm(dim=-1, keepdim=True)
    return fused


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# récupérer le tokenizer correct
tokenizer = open_clip.get_tokenizer("ViT-B-16")

voc_labels1 = [
    "a photo of a aeroplane","a photo of a bicycle","a photo of a bird",
    "a photo of a boat","a photo of a bottle","a photo of a bus",
    "a photo of a car","a photo of a cat","a photo of a chair",
    "a photo of a cow","a photo of a dining table","a photo of a dog",
    "a photo of a horse","a photo of a motorbike","a photo of a person",
    "a photo of a potted plant","a photo of a sheep","a photo of a sofa",
    "a photo of a train","a photo of a tv monitor"
]

text_tokens = tokenizer(voc_labels1)
text_tokens = text_tokens.to(device)

with torch.no_grad():
    text_embeddings = model.encode_text(text_tokens)

text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)

print(text_embeddings.shape)




In [None]:
def runa_score(fused_emb, text_embeddings):
    similarities = fused_emb @ text_embeddings.T   # cosine similarity car embeddings normalisés
    max_sim = similarities.max().item()
    score = -max_sim
    return score, similarities


In [None]:
def runa_score_image(image, predictor, text_embeddings):

    # 1. Detectron2 + crop + blur
    results = detect_crop_and_blurv2(image, predictor)

    if len(results) == 0:
        return None   # aucune bbox détectée

    # 2. CLIP embeddings
    crop_emb, global_emb = encode_runa_batch(results)

    # 3. Score RUNA pour chaque bbox
    scores = []
    for i in range(len(crop_emb)):
        fused = fuse_embeddings(crop_emb[i], global_emb[i])
        score, _ = runa_score(fused, text_embeddings)
        scores.append(score)

    # 4. Score image = meilleur objet
    return min(scores)


In [None]:
def compute_scores(
    img_paths,
    predictor,
    text_embeddings,
    save_path,
    save_every=50,
    desc="Processing images"
):
    scores = []
    processed = 0

    for img_path in tqdm(img_paths, desc=desc):
        image = cv2.imread(img_path)
        if image is None:
            continue

        score = runa_score_image(image, predictor, text_embeddings)

        if score is not None:
            scores.append(score)
            processed += 1

            # sauvegarde progressive
            if processed % save_every == 0:
                with open(save_path, "wb") as f:
                    pickle.dump(scores, f)

    # sauvegarde finale
    with open(save_path, "wb") as f:
        pickle.dump(scores, f)

    print(f"\n{desc}")
    print("Nombre d'images scorées:", len(scores))
    if len(scores) > 0:
        print("Min/Max scores:", min(scores), max(scores))

    return scores


### IID : Pascal VOC2007

In [None]:
voc_root = "VOC/VOCtest_train_06-Nov-2007/VOCdevkit/VOC2007"
img_dir = os.path.join(voc_root, "JPEGImages")
split_file = os.path.join(voc_root, "ImageSets", "Main", "test.txt")

with open(split_file, "r") as f:
    test_ids = [line.strip() for line in f]

img_paths_voc = [os.path.join(img_dir, f"{img_id}.jpg") for img_id in test_ids]
print("Nb images test:", len(img_paths_voc))
print("Exemple:", img_paths_voc[0])


In [None]:
scores_voc = compute_scores(
    img_paths=img_paths_voc,
    predictor=predictor,
    text_embeddings=text_embeddings,
    save_path="scores_voc2007_test.pkl",
    save_every=50,
    desc="VOC 2007 Test"
)

In [None]:
with open("scores_voc2007_test.pkl", "rb") as f:
    scores_voc = pickle.load(f)

print(type(scores_voc))
print(len(scores_voc))


In [None]:
plt.hist(scores_voc, bins=50)
plt.title("Distribution des scores VOC")
plt.xlabel("Score")
plt.ylabel("Nombre d'images")
plt.show()


### OOD: MS COCO 2014 non filtré

In [None]:
coco_dir = "coco/images/val2014"
img_paths_coco = [
    os.path.join(coco_dir, f)
    for f in os.listdir(coco_dir)
    if f.endswith(".jpg")
]

In [None]:
scores_coco = compute_scores(
    img_paths=img_paths_coco,
    predictor=predictor,
    text_embeddings=text_embeddings,
    save_path="scores_coco2014_val.pkl",
    save_every=50,
    desc="COCO 2014 Val"
)


In [None]:
with open("scores_coco2014_val.pkl", "rb") as f:
    scores_coco = pickle.load(f)

print(len(scores_coco))
print(scores_coco[:10])


In [None]:
def evaluate_ood(scores_id, scores_ood, id_name="ID", ood_name="OOD"):
    scores_id = list(scores_id)
    scores_ood = list(scores_ood)

    print(f"{id_name} mean:", np.mean(scores_id))
    print(f"{ood_name} mean:", np.mean(scores_ood))

    y_true = [0] * len(scores_id) + [1] * len(scores_ood)
    y_scores = scores_id + scores_ood

    auc = roc_auc_score(y_true, y_scores)
    print("AUROC:", auc)

    fpr, tpr, thresholds = roc_curve(y_true, y_scores)

    idx = np.argmin(np.abs(tpr - 0.95))
    fpr95 = fpr[idx]

    print("FPR95:", fpr95)

    return {
        "id_mean": np.mean(scores_id),
        "ood_mean": np.mean(scores_ood),
        "auroc": auc,
        "fpr95": fpr95,
    }


In [None]:
results_voc_coco = evaluate_ood(
    scores_voc,
    scores_coco,
    id_name="VOC",
    ood_name="COCO"
)

In [None]:
def plot_score_distributions(scores_id, scores_ood,
                             id_name="ID", ood_name="OOD",
                             bins=50):
    plt.hist(scores_id, bins=bins, alpha=0.5, label=f"{id_name} (ID)")
    plt.hist(scores_ood, bins=bins, alpha=0.5, label=f"{ood_name} (OOD)")
    plt.legend()
    plt.xlabel("Score")
    plt.ylabel("Count")
    plt.title("Score distributions")
    plt.show()

In [None]:
plot_score_distributions(scores_voc, scores_coco, "VOC", "COCO")

### MS COCO 2014 filtré

In [None]:
ann_file = "coco/annotations/instances_val2014.json"
coco = COCO(ann_file)

In [None]:
cats = coco.loadCats(coco.getCatIds())
cat_names = [c["name"] for c in cats]
print(cat_names)

In [None]:
voc_labels_coco = {
"airplane","bicycle","bird","boat","bottle","bus","car","cat",
"chair","cow","dining table","dog","horse","motorcycle",
"person","potted plant","sheep","couch","train","tv"
}

cats = coco.loadCats(coco.getCatIds())
cat_ids = [c["id"] for c in cats if c["name"] in voc_labels_coco]

print("Cat IDs:", cat_ids)


In [None]:
img_ids_with_voc = set()

for cat_id in cat_ids:
    ann_ids = coco.getAnnIds(catIds=[cat_id])
    anns = coco.loadAnns(ann_ids)
    for ann in anns:
        img_ids_with_voc.add(ann["image_id"])

print("Images contenant classes VOC:", len(img_ids_with_voc))


In [None]:
all_img_ids = set(coco.getImgIds())
ood_img_ids = list(all_img_ids - img_ids_with_voc)

print("Images OOD:", len(ood_img_ids))


In [None]:
img_paths_ood = [
    os.path.join(coco_dir, coco.loadImgs(img_id)[0]["file_name"])
    for img_id in ood_img_ids
]

print("Exemple:", img_paths_ood[0])


In [None]:
scores_coco_filtered = compute_scores(
    img_paths=img_paths_ood,
    predictor=predictor,
    text_embeddings=text_embeddings,
    save_path="scores_coco_filtered.pkl",
    save_every=100,
    desc="COCO Filtered OOD"
)

In [None]:
with open("scores_coco_filtered.pkl", "rb") as f:
    scores_coco_filtered = pickle.load(f)

print(type(scores_coco_filtered))
print(len(scores_coco_filtered))


In [None]:
results_voc_coco_filtered = evaluate_ood(
    scores_voc,
    scores_coco_filtered,
    id_name="VOC",
    ood_name="COCO filtered"
)


In [None]:
plot_score_distributions(scores_voc, scores_coco_filtered,
                         "VOC", "COCO filtered")