In [None]:
import os
import json
import torch
import clip
from PIL import Image
from tqdm import tqdm

# Paden instellen
object_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Objectdetectie\detections_with_tracking_YOLOv12mv8_stabilizer_pressure_control.json"
har_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\mmaction2\final_segments_with_boxes_stabilizer_pressure_control.json"
chunk_path = "updated_chunks_with_segments_and_captions.json"
output_path = "updated_chunks_with_clip_object_action_links.json"

# CLIP initialiseren
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# JSON inladen
with open(object_path, "r", encoding="utf-8") as f:
    object_data = json.load(f)

with open(har_path, "r", encoding="utf-8") as f:
    action_data = json.load(f)

with open(chunk_path, "r", encoding="utf-8") as f:
    chunk_data = json.load(f)

# IoU berekening
def calculate_iou(box1, box2):
    x1 = max(box1["x"], box2["x"])
    y1 = max(box1["y"], box2["y"])
    x2 = min(box1["x"] + box1["width"], box2["x"] + box2["width"])
    y2 = min(box1["y"] + box1["height"], box2["y"] + box2["height"])
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    union_area = (
        box1["width"] * box1["height"] + box2["width"] * box2["height"] - inter_area
    )
    return inter_area / union_area if union_area != 0 else 0.0

# Crop image uit frame
def crop_from_frame(frame_path, bbox):
    try:
        image = Image.open(frame_path).convert("RGB")
        x = int(bbox["x"])
        y = int(bbox["y"])
        w = int(bbox["width"])
        h = int(bbox["height"])
        return preprocess(image.crop((x, y, x + w, y + h))).unsqueeze(0).to(device)
    except Exception as e:
        print(f"❌ Fout bij crop van {frame_path}: {e}")
        return None

# Filter objecten per chunk
def match_objects_to_chunk(start, end):
    return [
        {**obj, "object_id": obj_id}
        for obj_id, obj in object_data.items()
        if obj["last_seen"] >= start and obj["first_seen"] <= end
    ]

# Filter acties per chunk
def match_actions_to_chunk(start, end):
    return [
        act for act in action_data
        if act["end"] >= start and act["start"] <= end
    ]

# Debug output vóór verwerking
print(f"\nTotaal chunks: {len(chunk_data['chunks'])}")
print(f"Totaal acties in har_path: {len(action_data)}")
print(f"Totaal objecten in object_path: {len(object_data)}\n")

# Start verwerking
for chunk in tqdm(chunk_data["chunks"], desc="CLIP object-actie matching"):
    start = chunk["start"]
    end = chunk["end"]
    frames = chunk.get("frames", [])
    objects = match_objects_to_chunk(start, end)
    actions = match_actions_to_chunk(start, end)

    print(f"\nChunk {start:.2f}s – {end:.2f}s")
    print(f"Aantal frames: {len(frames)}")
    print(f"Aantal objecten: {len(objects)}")
    print(f"Aantal acties: {len(actions)}")

    if not objects:
        print("Geen objecten gevonden")
    if not actions:
        print("Geen acties gevonden")

    links = []

    for obj in objects:
        class_name = obj["class_name"]
        trajectory = obj["trajectory"]

        for act in actions:
            act_box = {
                "x": act["avg_bounding_box"]["x"],
                "y": act["avg_bounding_box"]["y"],
                "width": act["avg_bounding_box"]["w"],
                "height": act["avg_bounding_box"]["h"]
            }

            traj_points = [
                pt for pt in trajectory
                if act["start"] <= pt["time"] <= act["end"]
            ]

            for pt in traj_points:
                iou = calculate_iou(pt["bbox"], act_box)
                if iou < 0.05:
                    print(f"IoU te laag ({iou:.3f}) tussen object en actie")
                    continue

                if not frames:
                    print("Geen frames beschikbaar")
                    continue

                frame_path = frames[0]["frame_path"]
                if not os.path.exists(frame_path):
                    print(f"Frame bestaat niet: {frame_path}")
                    continue

                print(f"Frame: {frame_path}")
                print(f"Crop bbox: {pt['bbox']}")

                crop_tensor = crop_from_frame(frame_path, pt["bbox"])
                if crop_tensor is None:
                    continue

                with torch.no_grad():
                    image_feat = model.encode_image(crop_tensor)
                    text_feat = model.encode_text(clip.tokenize([act["label"]]).to(device))
                    sim = torch.cosine_similarity(image_feat, text_feat)[0].item()

                print(f"Vergelijking met actie '{act['label']}': CLIP sim = {sim:.3f}")

                if sim >= 0.05:
                    links.append({
                        "object_id": obj["object_id"],
                        "class_name": class_name,
                        "matched_action": act["label"],
                        "iou": iou,
                        "cosine_similarity": sim,
                        "frame_path": frame_path
                    })
                break

    chunk["clip_object_action_links"] = links
    print(f"Koppelingen gevonden: {len(links)}")

# Opslaan
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(chunk_data, f, indent=4)

print(f"\nCLIP-koppelingen opgeslagen naar {output_path}")


In [None]:
import json

# Pad naar bestand met CLIP-links
json_path = "updated_chunks_with_clip_object_action_links.json"

# Laad de JSON
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Print alle koppelingen per chunk
totaal_links = 0
for i, chunk in enumerate(data["chunks"], start=1):
    links = chunk.get("clip_object_action_links", [])
    if links:
        print(f"\nChunk {i} ({chunk['start']:.2f}s – {chunk['end']:.2f}s): {len(links)} koppelingen")
        for link in links:
            print(f"Object '{link['class_name']}' ⟶ Actie '{link['matched_action']}' | Sim: {link['cosine_similarity']:.3f} | IoU: {link['iou']:.2f}")
        totaal_links += len(links)

print(f"\nTotaal aantal CLIP-koppelingen: {totaal_links}")
