# CAKE

## Audio bestand maken

In [None]:
import time

start_time = time.time()

In [None]:
import os
from pydub import AudioSegment

In [None]:
def convert_mp4_to_wav(input_mp4_path, output_wav_path):
    print(f"Converting '{input_mp4_path}' to WAV format...")
    audio = AudioSegment.from_file(input_mp4_path, format="mp4")
    audio.export(output_wav_path, format="wav")
    print(f"Conversion complete! WAV file saved to: {output_wav_path}")

In [None]:
vid = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Data\Video's\machine cover.mp4"
aud = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Cake\Data_machine_cover_2\machine_cover_2.wav"

In [None]:
convert_mp4_to_wav(vid, aud)

## Transcribe

In [None]:
import whisperx
import gc
import torch
import json
import os


In [None]:
# Input
wav_folder = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Cake\Data_machine_cover_2"
output_folder = "transcriptions_machine_cover_2"
unsupported_folder = "unsupported_language_machine_cover_2"
model_dir = "whisper-models"
vad_model_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Cake\CAKE\VAD\pytorch_model.bin"
audio_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Cake\Data_machine_cover_2\machine_cover_2.wav"

# Ensure output folders exist
os.makedirs(output_folder, exist_ok=True)
os.makedirs(unsupported_folder, exist_ok=True)

In [None]:
def transcribe(audio_file):
    # # Check system for compatibility
    # if torch.cuda.is_available():
    #     device = "cuda"
    #     print("CUDA wordt gebruikt")
    #     compute_type = "float16"  # change to "int8" if low on GPU mem (may reduce accuracy)
    #     batch_size = 16  # reduce if low on GPU mem
    # elif torch.backends.mps.is_available():
    #     device = "cpu"
    #     print("MPS (Apple Silicon) gebruikt")
    #     compute_type = "int8"
    #     batch_size = 8
    # else:
    #     print("CPU gebruikt")
    #     device = "cpu"
    #     compute_type = "int8"
    #     batch_size = 4
    
    print("CPU gebruikt")
    device = "cpu"
    compute_type = "float32"
    batch_size = 4

    if not os.path.exists(model_dir):
        model = whisperx.load_model("large-v2", device, compute_type=compute_type, download_root=model_dir)
    else:
        model = whisperx.load_model("./whisper-models/models--Systran--faster-whisper-large-v2/snapshots/f0fe81560cb8b68660e564f55dd99207059c092e", device, compute_type=compute_type, vad_model_fp=vad_model_path)

    audio = whisperx.load_audio(audio_file)

    # Perform transcription with automatic language detection
    result = model.transcribe(audio, batch_size=batch_size)
    detected_language = result.get("language", "en")

    # Check if detected language is supported, otherwise move file to unsupported folder
    if detected_language not in ["en", "fr", "de", "es", "nl"]:
        print(f"Language detected as {detected_language}, moving to unsupported folder.")
        os.rename(audio_file, os.path.join(unsupported_folder, os.path.basename(audio_file)))
        return

    print(f"Detected language: {detected_language}")

   
    try:
        model_a, metadata = whisperx.load_align_model(language_code=detected_language, device=device)
        result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
        gc.collect()
        torch.cuda.empty_cache()
        del model_a
    except ValueError as e:
        print(f"Skipping alignment due to error: {e}")

    # Save as JSON
    base_filename = os.path.splitext(os.path.basename(audio_file))[0]
    output_json_path = os.path.join(output_folder, f"{base_filename}.json")
    with open(output_json_path, 'w') as f:
        json.dump(result, f, indent=2)

    print(f"Results saved to {output_json_path}")

In [None]:
# Process all WAV files in the folder
for filename in os.listdir(wav_folder):
    if filename.endswith(".wav"):
        audio_path = os.path.join(wav_folder, filename)
        transcribe(audio_path)

## Full text of transcription

In [None]:
import os
import json

# Input directory containing JSON files
json_folder = 'transcriptions_machine_cover_2'
output_dir = 'individual_texts_machine_cover_2'

# Maak output directory aan als deze niet bestaat
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory '{output_dir}' is ready.")

# Process each JSON file individually
for json_file in os.listdir(json_folder):
    if json_file.endswith('.json'):
        json_path = os.path.join(json_folder, json_file)
        print(f"Processing file: {json_file}")

        # Controleer of het JSON-bestand geldig is
        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"Error: Failed to process '{json_file}'. Details: {e}")
            continue

        segments = data.get('segments', [])
        if not segments:
            print(f"Warning: No segments found in '{json_file}'.")
            continue

        # Create output text file for the individual transcript
        individual_output_path = os.path.join(output_dir, json_file.replace('.json', '.txt'))

        with open(individual_output_path, 'w', encoding='utf-8') as individual_file:
            for i, segment in enumerate(segments, start=1):
                text = segment.get('text', '').strip()
                if not text:
                    print(f"Warning: Segment {i} in '{json_file}' is empty.")
                    continue
                individual_file.write(f"{text} ")

        print(f"Transcript saved to '{individual_output_path}'.")

## Output chonkie chunks with timestamp (in json)

In [None]:
import os
import json
from chonkie import SDPMChunker

def load_document(file_path: str) -> str:

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file '{file_path}' does not exist.")
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    return content

def load_json(file_path: str) -> dict:

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Error: The JSON file '{file_path}' does not exist.")
    with open(file_path, 'r', encoding='utf-8') as f:
        try:
            return json.load(f)
        except json.JSONDecodeError as e:
            raise ValueError(f"Error: Failed to decode JSON file. Details: {e}")

def create_chunker(embedding_model="minishlab/potion-base-8M", chunk_size=512, min_sentences=1):

    return SDPMChunker(
        embedding_model=embedding_model,
        chunk_size=chunk_size,
        min_sentences=min_sentences
    )

# def process_text_and_json(text_folder: str, json_folder: str, output_folder: str):

#     if not os.path.exists(output_folder):
#         os.makedirs(output_folder)

#     for text_file in os.listdir(text_folder):
#         if text_file.endswith(".txt"):
#             base_name = os.path.splitext(text_file)[0]
#             text_path = os.path.join(text_folder, text_file)
#             json_path = os.path.join(json_folder, base_name + ".json")

#             if not os.path.exists(json_path):
#                 print(f"Warning: No matching JSON file for {text_file}")
#                 continue

#             text_content = load_document(text_path)
#             json_data = load_json(json_path)
#             segments = json_data.get('word_segments', [])

#             if not segments:
#                 raise ValueError(f"Error: No segments found in the JSON file {json_path}.")

#             word_list = [[seg.get('word', '').strip(), seg.get('start', ''), seg.get('end', '')] for seg in segments if seg.get('word', '').strip()]
#             chunker = create_chunker()
#             chunks = chunker.chunk(text_content)

#             final_chunks = []
#             current_word_index = 0
#             for chunk in chunks:
#                 chunk_text = chunk.text
#                 chunk_words = chunk_text.split()
#                 chunk_word_data = []
#                 chunk_start = None
#                 chunk_end = None

#                 for chunk_word in chunk_words:
#                     if current_word_index < len(word_list):
#                         word_info = word_list[current_word_index]
#                         if chunk_word == word_info[0]:
#                             chunk_word_data.append({
#                                 "word": word_info[0],
#                                 "start": word_info[1],
#                                 "end": word_info[2]
#                             })
#                             if chunk_start is None:
#                                 chunk_start = word_info[1]
#                             chunk_end = word_info[2]
#                             current_word_index += 1
#                         else:
#                             raise ValueError(f"Word mismatch at chunk '{chunk_text}': Expected '{word_info[0]}', found '{chunk_word}'.")
#                     else:
#                         raise IndexError("Ran out of words in word_data to match with chunks.")

#                 final_chunks.append({
#                     "text": chunk_text,
#                     "start": chunk_start,
#                     "end": chunk_end,
#                     "words": chunk_word_data
#                 })

#             output_json_path = os.path.join(output_folder, base_name + "_chunks.json")
#             with open(output_json_path, 'w', encoding='utf-8') as f:
#                 json.dump({"chunks": final_chunks}, f, ensure_ascii=False, indent=4)
#                 print(f"Processed {text_file} and saved to {output_json_path}")

def is_valid_time(value):
    try:
        return value not in (None, '', ' ') and float(value) >= 0
    except (ValueError, TypeError):
        return False

def process_text_and_json(text_folder: str, json_folder: str, output_folder: str):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for text_file in os.listdir(text_folder):
        if text_file.endswith(".txt"):
            base_name = os.path.splitext(text_file)[0]
            text_path = os.path.join(text_folder, text_file)
            json_path = os.path.join(json_folder, base_name + ".json")

            if not os.path.exists(json_path):
                print(f"Warning: No matching JSON file for {text_file}")
                continue

            text_content = load_document(text_path)
            json_data = load_json(json_path)
            segments = json_data.get('word_segments', [])

            if not segments:
                raise ValueError(f"Error: No segments found in the JSON file {json_path}.")

            word_list = [
                [seg.get('word', '').strip(), seg.get('start'), seg.get('end')]
                for seg in segments
                if seg.get('word', '').strip()
                and is_valid_time(seg.get('start'))
                and is_valid_time(seg.get('end'))
            ]

            total_segments = len(segments)
            valid_segments = len(word_list)
            skipped_segments = total_segments - valid_segments
            if skipped_segments > 0:
                print(f"Waarschuwing: {skipped_segments} ongeldige segmenten overgeslagen in {json_path}")

            chunker = create_chunker()
            chunks = chunker.chunk(text_content)

            final_chunks = []
            current_word_index = 0
            total_words = len(word_list)

            for chunk in chunks:
                chunk_text = chunk.text.strip()
                approx_num_words = len(chunk_text.split())

                chunk_word_data = []
                chunk_start = None
                chunk_end = None

                for _ in range(approx_num_words):
                    if current_word_index >= total_words:
                        break
                    word_info = word_list[current_word_index]
                    chunk_word_data.append({
                        "word": word_info[0],
                        "start": word_info[1],
                        "end": word_info[2]
                    })
                    if chunk_start is None:
                        chunk_start = word_info[1]
                    chunk_end = word_info[2]
                    current_word_index += 1

                final_chunks.append({
                    "text": chunk_text,
                    "start": chunk_start,
                    "end": chunk_end,
                    "words": chunk_word_data
                })

            output_json_path = os.path.join(output_folder, base_name + "_chunks.json")
            with open(output_json_path, 'w', encoding='utf-8') as f:
                json.dump({"chunks": final_chunks}, f, ensure_ascii=False, indent=4)
                print(f"Processed {text_file} and saved to {output_json_path}")


if __name__ == "__main__":
    text_folder = 'individual_texts_machine_cover_2'
    json_folder = 'transcriptions_machine_cover_2'
    output_folder = 'processed_json_machine_cover_2'
    process_text_and_json(text_folder, json_folder, output_folder)

## Video segmentation chonkie

In [None]:
import json
import os
import subprocess

def create_segments(video_file, result):

    if not os.path.exists(result):
        raise FileNotFoundError(f"Error: The result file '{result}' does not exist.")

    with open(result, 'r', encoding='utf-8') as f:
        data = json.load(f)

    segments = data.get('chunks', [])
    if not segments:
        raise ValueError("No segments found in the JSON file.")

    video_name = os.path.splitext(os.path.basename(video_file))[0]
    output_dir = os.path.join('video_segments_machine_cover_2', video_name)
    os.makedirs(output_dir, exist_ok=True)
    print(f"Output directory '{output_dir}' is ready.")

    for i, segment in enumerate(segments, start=1):
        start = segment['start']
        end = segment['end']
        output_filename = f"segment_{i}_{int(start)}_{int(end)}.mp4"
        output_path = os.path.join(output_dir, output_filename)

        command = [
            "ffmpeg",
            "-y",
            "-i", video_file,
            "-ss", str(start),
            "-to", str(end),
            "-c:v", "libx264",
            "-c:a", "aac",
            output_path
        ]
        print(f"Creating segment {i}: {start} to {end} seconds for {video_file}.")
        subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"Segment {i} saved as '{output_filename}'.")

    print(f"All segments for {video_file} have been processed.")

def process_videos_in_directory(video_directory, json_directory):

    if not os.path.isdir(video_directory):
        raise NotADirectoryError(f"Error: The directory '{video_directory}' does not exist.")
    if not os.path.isdir(json_directory):
        raise NotADirectoryError(f"Error: The directory '{json_directory}' does not exist.")

    video_files = sorted(f for f in os.listdir(video_directory) if f.endswith('.mp4'))
    json_files = sorted(f for f in os.listdir(json_directory) if f.endswith('.json'))

    for video_file, json_file in zip(video_files, json_files):
        video_path = os.path.join(video_directory, video_file)
        json_path = os.path.join(json_directory, json_file)
        create_segments(video_path, json_path)

if __name__ == "__main__":
    video_directory = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Cake\Video_machine_cover_2"  # Replace with your video directory
    json_directory = "processed_json_machine_cover_2"  # Replace with your JSON directory
    process_videos_in_directory(video_directory, json_directory)

## Frame Extraction
### 1 frame per 3 seconds

In [None]:
import cv2
import os

def extract_frames(video_path, output_dir, interval_seconds=3):
    video_name = os.path.basename(video_path).replace(".", "_")
    segment_output_dir = os.path.join(output_dir, *video_path.split(os.sep)[-2:])
    os.makedirs(segment_output_dir, exist_ok=True)

    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    if fps <= 0:
        print(f"Kan de FPS voor {video_path} niet ophalen.")
        return

    frame_interval = int(fps * interval_seconds)
    success, image = vidcap.read()
    count = 0
    frame_count = 0

    while success:
        if frame_count % frame_interval == 0:
            frame_path = os.path.join(segment_output_dir, f"{video_name}_frame_{count}.jpg")
            cv2.imwrite(frame_path, image)
            count += 1
        success, image = vidcap.read()
        frame_count += 1

    vidcap.release()
    print(f"Geëxtraheerd {count} frames uit {video_path} naar {segment_output_dir}")

def process_videos_in_directory(base_directory, output_directory, interval_seconds=3):
    if not os.path.isdir(base_directory):
        raise NotADirectoryError(f"Error: De map '{base_directory}' bestaat niet.")

    os.makedirs(output_directory, exist_ok=True)
    print(f"Output directory '{output_directory}' is gereed.")

    for subdir in sorted(os.listdir(base_directory)):
        subdir_path = os.path.join(base_directory, subdir)
        if os.path.isdir(subdir_path):
            for filename in sorted(os.listdir(subdir_path)):
                if filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
                    video_path = os.path.join(subdir_path, filename)
                    extract_frames(video_path, output_directory, interval_seconds)

if __name__ == "__main__":
    video_directory = "video_segments_machine_cover_2"  # Vervang dit door jouw basisvideo-segmentenmap
    output_directory = "frames_machine_cover_2"         # Map om geëxtraheerde frames op te slaan
    process_videos_in_directory(video_directory, output_directory, interval_seconds=3)

## Object segmentation

In [None]:
import torch
import numpy as np
import cv2
import os
import time
from dataclasses import dataclass
from typing import List, Tuple
from PIL import Image
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator

@dataclass
class BoxPrompt:
    x1: float
    y1: float
    x2: float
    y2: float

class AdvancedAutoSAM:
    def __init__(self, model_type="vit_l", model_path="./models/sam_vit_l_0b3195.pth"):
        self.model_type = model_type
        self.model_path = model_path
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self._load_model()

    def _load_model(self):
        print(f"Loading SAM model: {self.model_type}...")
        self.sam = sam_model_registry[self.model_type](checkpoint=self.model_path)
        self.sam.to(self.device)
        self.mask_generator = SamAutomaticMaskGenerator(self.sam)
        print("SAM model loaded.")

    def auto_generate_masks(self, image_path: str) -> List[np.ndarray]:
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Failed to load {image_path}")
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        masks = self.mask_generator.generate(image_rgb)
        binary_masks = [m['segmentation'].astype(np.uint8) * 255 for m in masks]
        print(f"Generated {len(binary_masks)} masks.")
        return binary_masks

    def save_masks_as_color_overlay(self, masks: List[np.ndarray], image_path: str, output_path: str):
        image_bgr = cv2.imread(image_path)
        overlay = image_bgr.copy()
        for mask in masks:
            color = np.random.randint(0, 255, size=3, dtype=np.uint8)
            mask_indices = mask > 0
            overlay[mask_indices] = (0.5 * overlay[mask_indices] + 0.5 * color).astype(np.uint8)
        cv2.imwrite(output_path, overlay)
        print(f"Overlay saved to {output_path}")

    def save_masks_individually(self, masks: List[np.ndarray], output_folder: str, base_name: str = "mask"):
        os.makedirs(output_folder, exist_ok=True)
        for idx, mask in enumerate(masks, 1):
            path = os.path.join(output_folder, f"{base_name}_{idx}.png")
            cv2.imwrite(path, mask)
            print(f"Saved mask to {path}")

    def save_segments_individually(self, masks: List[np.ndarray], image_path: str, output_folder: str, base_name: str = "segment"):
        os.makedirs(output_folder, exist_ok=True)
        image = cv2.imread(image_path)
        for idx, mask in enumerate(masks, 1):
            segment = cv2.bitwise_and(image, image, mask=mask)
            path = os.path.join(output_folder, f"{base_name}_{idx}.png")
            cv2.imwrite(path, segment)
            print(f"Saved segment to {path}")


In [None]:
def main():
    auto_sam = AdvancedAutoSAM()

    input_dir = "frames_machine_cover_2/machine_cover_2/"
    output_dir = "generated_results_machine_cover_2"
    overlay_dir = os.path.join(output_dir, "overlays")
    masks_dir = os.path.join(output_dir, "masks")
    segments_dir = os.path.join(output_dir, "segments")

    os.makedirs(overlay_dir, exist_ok=True)
    os.makedirs(masks_dir, exist_ok=True)
    os.makedirs(segments_dir, exist_ok=True)

    supported_extensions = ('.jpg', '.jpeg', '.png', '.bmp')

    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith(supported_extensions):
                image_path = os.path.join(root, file)
                base_name = os.path.splitext(file)[0]

                print(f"\nProcessing {image_path}...")
                try:
                    masks = auto_sam.auto_generate_masks(image_path)
                    if not masks:
                        continue
                    auto_sam.save_masks_as_color_overlay(
                        masks, image_path,
                        os.path.join(overlay_dir, f"{base_name}_overlay.png")
                    )
                    auto_sam.save_masks_individually(
                        masks,
                        os.path.join(masks_dir, f"{base_name}_masks")
                    )
                    auto_sam.save_segments_individually(
                        masks,
                        image_path,
                        os.path.join(segments_dir, f"{base_name}_segments")
                    )
                except Exception as e:
                    print(f"Error processing {file}: {e}")

# if __name__ == "__main__":
#     main()

if __name__ == "__main__":
    print("SAM is uitgeschakeld. Lege segment-map wordt aangemaakt...")
    os.makedirs("generated_results_machine_cover_2/segments", exist_ok=True)

## Image captioning on the frames

In [None]:
import os
import json
from PIL import Image
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
import torch


# Pad naar de hoofd directory met geëxtraheerde frames (alle video segmenten)
frames_dir = "frames_machine_cover_2/machine_cover_2/"  # Pas dit aan naar jouw frames hoofd directory

# Output JSON bestand
output_json_path = "combined_frames_machine_cover_2.json"


def initialize_captioning_model():

    print("Initialiseren van het image captioning model...")
    model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
    feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
    tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
    print("Model geïnitieerd.")
    return model, feature_extractor, tokenizer


def generate_caption(image_path, model, feature_extractor, tokenizer, device):

    try:
        # print(f"Generating caption for: {image_path}")  # Optioneel: Kan veel output genereren
        image = Image.open(image_path).convert("RGB")
        pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)

        output_ids = model.generate(pixel_values, max_length=16, num_beams=4)
        caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        print(f"Error generating caption for {image_path}: {e}")
        return ""

def process_all_frames(frames_dir, model, feature_extractor, tokenizer, device):
    combined_data = []
    supported_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')

    if not os.path.isdir(frames_dir):
        print(f"Error: Frames directory {frames_dir} bestaat niet.")
        return combined_data

    # Itereer over alle subdirectories (video segmenten)
    video_segments = [d for d in os.listdir(frames_dir) if os.path.isdir(os.path.join(frames_dir, d))]
    total_segments = len(video_segments)
    print(f"Found {total_segments} video segments in '{frames_dir}'.")

    for seg_idx, segment in enumerate(sorted(video_segments), start=1):
        segment_path = os.path.join(frames_dir, segment)
        frame_files = [f for f in os.listdir(segment_path) if f.lower().endswith(supported_extensions)]

        if not frame_files:
            print(f"error: Geen frames gevonden in segment '{segment}'.")
            continue

        total_frames = len(frame_files)
        print(f"Processing segment {seg_idx}/{total_segments}: '{segment}' with {total_frames} frames.")

        for idx, frame_file in enumerate(sorted(frame_files), start=1):
            frame_path = os.path.join(segment_path, frame_file)

            # Genereer een caption voor het frame
            caption = generate_caption(frame_path, model, feature_extractor, tokenizer, device)
            # print(f"Generated caption for {frame_file}: {caption}")  # Optioneel: kan veel output genereren

            # Voeg de gecombineerde data toe aan de lijst
            combined_data.append({
                "video_segment": segment,  # Naam van het video segment
                "frame_number": idx - 1,  # Frames starten meestal bij 0
                "frame_filename": frame_file,
                "caption": caption
            })

            if idx % 100 == 0 or idx == total_frames:
                print(f"Segment '{segment}': Processed {idx}/{total_frames} frames.")

    return combined_data


def save_combined_data(combined_data, output_json_path):

    try:
        with open(output_json_path, 'w', encoding='utf-8') as f:
            json.dump(combined_data, f, indent=4)
        print(f"Combined JSON saved to '{output_json_path}'.")
    except Exception as e:
        print(f"Error saving JSON file: {e}")


def main():
    # Initialiseer het captioning model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    model, feature_extractor, tokenizer = initialize_captioning_model()
    model.to(device)

    # Genereer captions voor alle frames
    combined_data = process_all_frames(
        frames_dir=frames_dir,
        model=model,
        feature_extractor=feature_extractor,
        tokenizer=tokenizer,
        device=device
    )

    if not combined_data:
        print("Geen gecombineerde data om op te slaan.")
        return

    # Sla de gecombineerde data op als JSON
    save_combined_data(combined_data, output_json_path)

if __name__ == "__main__":
    main()
# if __name__ == "__main__":
#     print("Captioning is uitgeschakeld. Leeg combined_frames.json wordt aangemaakt...")
#     empty_json = []
#     with open("combined_frames_machine_cover_2.json", "w", encoding="utf-8") as f:
#         json.dump(empty_json, f, indent=4)

## Connect segments and captions to chunks

In [None]:
import json
import os
from pathlib import Path
from typing import List, Dict, Any
import re

def load_json(file_path: str) -> Any:

    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def save_json(data: Any, file_path: str):
  
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)
    print(f"JSON opgeslagen als '{file_path}'.")

def parse_segment_filename(filename: str) -> Dict[str, Any]:
    pattern = r"segment_(\d+)_(\d+)_(\d+)\.mp4"
    match = re.match(pattern, filename)
    if match:
        return {
            "index": int(match.group(1)),
            "start": float(match.group(2)),
            "end": float(match.group(3))
        }
    else:
        return {}

def find_video_segments(chunks: List[Dict[str, Any]], video_segments_dir: str) -> Dict[tuple, str]:
 
    video_segments = os.listdir(video_segments_dir)
    segment_map = {}
    for segment_file in video_segments:
        parsed = parse_segment_filename(segment_file)
        if not parsed:
            print(f" file '{segment_file}' not same as pattern")
            continue
        key = (parsed['start'], parsed['end'])
        segment_map[key] = os.path.join(video_segments_dir, segment_file)
    return segment_map

def find_frames_for_segment(frames_dir: str, segment_filename: str) -> List[str]:

    segment_frame_dir = os.path.join(frames_dir, segment_filename)
    if not os.path.isdir(segment_frame_dir):
        print(f"error: Frames directory '{segment_frame_dir}' not exist.")
        return []
    frames = [os.path.join(segment_frame_dir, f) for f in os.listdir(segment_frame_dir) 
              if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))]
    return sorted(frames)  # Sorteer frames op naam

def find_object_segments_for_frame(generated_segments_dir: str, segment_filename: str, frame_filename: str) -> List[str]:
    # Voorbeeld pad:
    # generated_segments/segment_1_0_32.mp4/segment_1_0_32_mp4_frame_000003_segments/segment_1.png
    frame_basename = os.path.splitext(os.path.basename(frame_filename))[0]  # segment_1_0_32_mp4_frame_000003
    segments_folder = os.path.join(generated_segments_dir, f"{frame_basename}_segments")
    if not os.path.isdir(segments_folder):
        print(f"Waarschuwing: Object segments directory '{segments_folder}' bestaat niet.")
        return []
    object_segments = [os.path.join(segments_folder, f) for f in os.listdir(segments_folder) 
                       if f.lower().endswith('.png')]
    return sorted(object_segments)  # Sorteer object segmenten op naam

def load_captions(captions_json_path: str) -> Dict[str, str]:
    captions_data = load_json(captions_json_path)
    caption_map = {}
    for entry in captions_data:
        video_segment = entry.get('video_segment')
        frame_filename = entry.get('frame_filename')
        caption = entry.get('caption', "")
        if video_segment and frame_filename:
            key = f"{video_segment}/{frame_filename}"
            caption_map[key] = caption
    print(f"Loaded captions for {len(caption_map)} frames.")
    return caption_map

def update_chunks_with_segments_and_captions(original_chunks: List[Dict[str, Any]], 
                                segment_map: Dict[tuple, str],
                                frames_dir: str,
                                generated_segments_dir: str,
                                captions_map: Dict[str, str]) -> List[Dict[str, Any]]:

    updated_chunks = []
    for chunk in original_chunks:
        chunk_start = chunk['start']
        chunk_end = chunk['end']
        key = (int(chunk_start), int(chunk_end))
        video_segment_path = segment_map.get(key)
        if not video_segment_path:
            print(f"error: no video segment found for chunk start {chunk_start} and end {chunk_end}.")
            continue  # Of handle anders, afhankelijk van behoeften
        
        segment_filename = os.path.basename(video_segment_path)
        frames = find_frames_for_segment(frames_dir, segment_filename)
        frames_info = []
        for frame_path in frames:
            frame_filename = os.path.basename(frame_path)
            # object_segments = find_object_segments_for_frame(generated_segments_dir, segment_filename, frame_filename)
            object_segments = []
            if os.path.isdir(generated_segments_dir):
                object_segments = find_object_segments_for_frame(generated_segments_dir, segment_filename, frame_filename)
            # Koppel de caption op basis van video_segment en frame_filename
            caption_key = f"{segment_filename}/{frame_filename}"
            caption = captions_map.get(caption_key, "")
            frames_info.append({
                "frame_path": frame_path,
                "object_segments": object_segments,
                "caption": caption
            })
        
        # Voeg video segment en frames info toe aan de chunk
        updated_chunk = {
            "text": chunk['text'],
            "start": chunk['start'],
            "end": chunk['end'],
            "video_segment": video_segment_path,
            "frames": frames_info,
            "words": chunk.get('words', [])
        }
        updated_chunks.append(updated_chunk)
    
    return updated_chunks

def main():
    # Definieer paden
    original_json_path = "processed_json_machine_cover_2/machine_cover_2_chunks.json"  # Originele JSON met chunks
    video_segments_dir = "video_segments_machine_cover_2/machine_cover_2"  # Map met video segmenten
    frames_dir = "frames_machine_cover_2/machine_cover_2"  # Hoofd map met frames per segment
    generated_segments_dir = "generated_results_machine_cover_2/segments"  # Map met object segmenten
    captions_json_path = "combined_frames_machine_cover_2.json"  # JSON met image captions
    new_json_path = "updated_chunks_with_segments_and_captions_machine_cover_2.json"  # Nieuwe JSON output

    # Controleer of alle benodigde bestanden en directories bestaan
    if not os.path.exists(original_json_path):
        print(f"Error: Originele JSON bestand '{original_json_path}' bestaat niet.")
        return
    if not os.path.isdir(video_segments_dir):
        print(f"Error: Video segments directory '{video_segments_dir}' bestaat niet.")
        return
    if not os.path.isdir(frames_dir):
        print(f"Error: Frames directory '{frames_dir}' bestaat niet.")
        return
    if not os.path.isdir(generated_segments_dir):
        print(f"Error: Generated segments directory '{generated_segments_dir}' bestaat niet.")
        return
    if not os.path.exists(captions_json_path):
        print(f"Error: Captions JSON bestand '{captions_json_path}' bestaat niet.")
        return

    # Laad originele JSON
    original_data = load_json(original_json_path)
    chunks = original_data.get('chunks', [])
    if not chunks:
        print("Geen chunks gevonden in de originele JSON.")
        return

    print(f"Loaded {len(chunks)} chunks from '{original_json_path}'.")

    # Maak een mapping van (start, end) tijden naar video segment paden
    segment_map = find_video_segments(chunks, video_segments_dir)
    print(f"Found {len(segment_map)} video segments.")

    # Laad captions en maak een mapping
    captions_map = load_captions(captions_json_path)

    # Update chunks met video segmenten, frames, object segmenten en captions
    updated_chunks = update_chunks_with_segments_and_captions(
        chunks, 
        segment_map, 
        frames_dir, 
        generated_segments_dir, 
        captions_map
    )

    print(f"Updated {len(updated_chunks)} chunks with segments and captions.")

    # Maak de nieuwe JSON structuur
    new_data = {
        "chunks": updated_chunks
    }

    # Sla de nieuwe JSON op
    save_json(new_data, new_json_path)
    print(f"new JSON with connected segments and captions saved as '{new_json_path}'.")


main()

In [None]:
import json
from sentence_transformers import SentenceTransformer, util
import numpy as np


object_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Objectdetectie\detections_with_tracking_YOLOv12mv8_machine_cover_2.json"
har_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\mmaction2\final_segments_with_boxes_machine_cover_2.json"

with open(object_path, "r", encoding="utf-8") as f:
    object_data = json.load(f)

with open(har_path, "r", encoding="utf-8") as f:
    action_data = json.load(f)

model = SentenceTransformer("all-MiniLM-L6-v2")



In [None]:
def match_objects_to_chunk(chunk_start, chunk_end, object_data):
    relevant_objects = []
    for obj_id, obj in object_data.items():
        if obj["last_seen"] >= chunk_start and obj["first_seen"] <= chunk_end:
            obj["object_id"] = obj_id
            relevant_objects.append(obj)
    return relevant_objects

def match_actions_to_chunk(chunk_start, chunk_end, actions):
    return [
        act for act in actions
        if act["end"] >= chunk_start and act["start"] <= chunk_end
    ]

def calculate_iou(box1, box2):
    x1 = max(box1["x"], box2["x"])
    y1 = max(box1["y"], box2["y"])
    x2 = min(box1["x"] + box1["width"], box2["x"] + box2["width"])
    y2 = min(box1["y"] + box1["height"], box2["y"] + box2["height"])
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    union_area = (
        box1["width"] * box1["height"] + box2["width"] * box2["height"] - inter_area
    )
    return inter_area / union_area if union_area != 0 else 0.0


def semantic_similarity(text1, text2, model):
    emb = model.encode([text1, text2], convert_to_tensor=True)
    return util.pytorch_cos_sim(emb[0], emb[1]).item()


def convert_action_box_to_bbox(action_box):
    return {
        "x": action_box["x"],
        "y": action_box["y"],
        "width": action_box["w"],
        "height": action_box["h"]
    }


In [None]:
with open("processed_json_machine_cover_2/machine_cover_2_chunks.json", "r", encoding="utf-8") as f:
    chunks_json = json.load(f)

enriched_chunks = []
for chunk in chunks_json["chunks"]:
    start = chunk["start"]
    end = chunk["end"]

    objects = match_objects_to_chunk(start, end, object_data)
    actions = match_actions_to_chunk(start, end, action_data)

    links = []
    for obj in objects:
        for act in actions:
            action_bbox = convert_action_box_to_bbox(act["avg_bounding_box"])
            ious = [
                calculate_iou(f["bbox"], action_bbox)
                for f in obj["trajectory"]
                if act["start"] <= f["time"] <= act["end"]
            ]
            avg_iou = np.mean(ious) if ious else 0.0
            if avg_iou < 0.05:
                continue
            sim = semantic_similarity(obj["class_name"], act["label"], model)
            # sim = semantic_similarity(f"object: {obj['class_name']}", f"action: {act['label']}", model)
            # sim = semantic_similarity(
            #     f"This is a tool or item called {obj['class_name']}", 
            #     f"This is a human action: {act['label']}", 
            #     model
            # )

            if sim < 0.05:
                continue
            links.append({
                "object_id": obj["object_id"],
                "class_name": obj["class_name"],
                "action_label": act["label"],
                "iou": avg_iou,
                "cosine_similarity": sim,
                "action_score": act["avg_score"]
            })

    chunk["object_detections"] = objects
    chunk["actions"] = actions
    chunk["object_action_links"] = links
    enriched_chunks.append(chunk)

# Opslaan
with open("chunks_with_objects_and_actions_machine_cover_2.json", "w", encoding="utf-8") as f:
    json.dump({"chunks": enriched_chunks}, f, indent=4)


In [None]:
import json

# JSON inladen
with open("chunks_with_objects_and_actions_machine_cover_2.json", "r", encoding="utf-8") as f:
    data = json.load(f)

for i, chunk in enumerate(data["chunks"]):
    print(f"Chunk {i+1} links:")
    for link in chunk.get("object_action_links", []):
        print(link)


## Hands

In [None]:
# import json

# # Pas dit pad aan
# hand_detection_json_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\handdetectie_resultaten_stabilizer pressure control.json"

# with open(hand_detection_json_path, 'r', encoding='utf-8') as f:
#     hand_data_raw = json.load(f)

# # Maak snelle lookup per frame-index
# hand_data = {item["frame"]: item for item in hand_data_raw}
# print(f"Handdata geladen voor {len(hand_data)} frames.")

In [None]:
# def point_in_bbox(x, y, bbox):
#     return bbox["x"] <= x <= bbox["x"] + bbox["width"] and bbox["y"] <= y <= bbox["y"] + bbox["height"]

# def hand_touches_object(hand_keypoints, object_bbox, frame_width, frame_height):
#     # converteer genormaliseerde coördinaten naar pixels
#     keypoint_indices = [4, 8]  # duimtop en wijsvingertop
#     for idx in keypoint_indices:
#         kp = hand_keypoints[idx]
#         x_px = int(kp["x"] * frame_width)
#         y_px = int(kp["y"] * frame_height)
#         if point_in_bbox(x_px, y_px, object_bbox):
#             return True
#     return False


In [None]:
# def find_closest_hand_data(time_sec, hand_data_dict, tolerance=0.05):
#     """
#     Zoek hand-data waarvan de tijd dicht bij 'time_sec' ligt (binnen tolerance).
#     Verwacht hand_data_dict als: {frame_idx: {"time": ..., "hands": [...]}}
#     """
#     closest = None
#     min_diff = float('inf')
#     for entry in hand_data_dict.values():  # <-- LET OP!
#         diff = abs(entry["time"] - time_sec)
#         if diff < min_diff and diff <= tolerance:
#             min_diff = diff
#             closest = entry
#     return closest


In [None]:
# def link_hand_to_object(chunk_start, chunk_end, object_data, hand_data_list, frame_width, frame_height):
#     links = []

#     for obj_id, obj in object_data.items():
#         for traj in obj["trajectory"]:
#             time_sec = traj["time"]
#             if not (chunk_start <= time_sec <= chunk_end):
#                 continue

#             bbox = traj["bbox"]
#             hand_entry = find_closest_hand_data(time_sec, hand_data_list)
#             if not hand_entry:
#                 continue

#             for hand in hand_entry.get("hands", []):
#                 if hand_touches_object(hand, bbox, frame_width, frame_height):
#                     links.append({
#                         "object_id": obj_id,
#                         "class_name": obj["class_name"],
#                         "time": time_sec,
#                         "contact_type": "touch"
#                     })
#                     break
#     return links


In [None]:
# # Laad bestaande chunks
# with open("chunks_with_objects_and_actions.json", "r", encoding="utf-8") as f:
#     chunks_data = json.load(f)

# # Laad object_data indien nog niet gedaan
# with open(r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Objectdetectie\detections_with_tracking_YOLOv12mv8_stabilizer_pressure_control.json", "r", encoding="utf-8") as f:
#     object_data = json.load(f)  # pas pad aan

# frame_width = 1920  # Pas aan naar jouw videoresolutie
# frame_height = 1080

# for chunk in chunks_data["chunks"]:
#     start = chunk["start"]
#     end = chunk["end"]
#     hand_links = link_hand_to_object(start, end, object_data, hand_data, frame_width, frame_height)
#     chunk["hand_object_links"] = hand_links

# # Opslaan
# with open("chunks_with_objects_actions_hands.json", "w", encoding="utf-8") as f:
#     json.dump(chunks_data, f, indent=4)
#     print("JSON met hand-object interacties opgeslagen als 'chunks_with_objects_actions_hands.json'")


## Hands v2

In [None]:
import json
import math
from shapely.geometry import LineString, box

# Pad naar handdetectie-data
hand_detection_json_path = r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\handdetectie_resultaten_machine_cover_2.json"

with open(hand_detection_json_path, 'r', encoding='utf-8') as f:
    hand_data_raw = json.load(f)

# Lookup per frame
hand_data = {item["frame"]: item for item in hand_data_raw}
print(f"Handdata geladen voor {len(hand_data)} frames.")

In [None]:
# Functies voor interacties
def point_in_bbox(x, y, bbox):
    return bbox["x"] <= x <= bbox["x"] + bbox["width"] and bbox["y"] <= y <= bbox["y"] + bbox["height"]

def hand_touches_object(hand_keypoints, object_bbox, frame_width, frame_height):
    keypoint_indices = [4, 8]  # duimtop en wijsvingertop
    for idx in keypoint_indices:
        kp = hand_keypoints[idx]
        x_px = int(kp["x"] * frame_width)
        y_px = int(kp["y"] * frame_height)
        if point_in_bbox(x_px, y_px, object_bbox):
            return True
    return False

def angle_between(p1, p2, p3):
    a = math.hypot(p3[0] - p2[0], p3[1] - p2[1])
    b = math.hypot(p1[0] - p2[0], p1[1] - p2[1])
    c = math.hypot(p1[0] - p3[0], p1[1] - p3[1])
    if a == 0 or b == 0:
        return 180
    cos_angle = (b**2 + a**2 - c**2) / (2 * a * b)
    return math.degrees(math.acos(max(min(cos_angle, 1), -1)))

def is_finger_straight(kp_a, kp_b, kp_c, frame_width, frame_height, max_angle=35):
    pa = (kp_a["x"] * frame_width, kp_a["y"] * frame_height)
    pb = (kp_b["x"] * frame_width, kp_b["y"] * frame_height)
    pc = (kp_c["x"] * frame_width, kp_c["y"] * frame_height)
    return angle_between(pa, pb, pc) < max_angle

def is_index_finger_straight(hand_keypoints, frame_width, frame_height):
    return (
        is_finger_straight(hand_keypoints[5], hand_keypoints[6], hand_keypoints[7], frame_width, frame_height)
        and is_finger_straight(hand_keypoints[6], hand_keypoints[7], hand_keypoints[8], frame_width, frame_height)
    )

def are_other_fingers_bent(hand_keypoints, frame_width, frame_height, min_angle=35):
    finger_indices = [(9, 10, 11), (13, 14, 15), (17, 18, 19)]
    for a, b, c in finger_indices:
        if is_finger_straight(hand_keypoints[a], hand_keypoints[b], hand_keypoints[c], frame_width, frame_height, max_angle=min_angle):
            return False
    return True

def hand_points_to_object(hand_keypoints, object_bbox, frame_width, frame_height):
    if not is_index_finger_straight(hand_keypoints, frame_width, frame_height):
        return False
    if not are_other_fingers_bent(hand_keypoints, frame_width, frame_height):
        return False

    kp5 = hand_keypoints[5]
    kp8 = hand_keypoints[8]
    x1, y1 = kp5["x"] * frame_width, kp5["y"] * frame_height
    x2, y2 = kp8["x"] * frame_width, kp8["y"] * frame_height

    dx = x2 - x1
    dy = y2 - y1
    if dx == 0 and dy == 0:
        return False

    x3 = x1 + dx * 10
    y3 = y1 + dy * 10

    pointing_line = LineString([(x1, y1), (x3, y3)])
    bbox_shape = box(object_bbox["x"], object_bbox["y"],
                     object_bbox["x"] + object_bbox["width"],
                     object_bbox["y"] + object_bbox["height"])
    return pointing_line.intersects(bbox_shape)

def find_closest_hand_data(time_sec, hand_data_dict, tolerance=0.05):
    closest = None
    min_diff = float('inf')
    for entry in hand_data_dict.values():
        diff = abs(entry["time"] - time_sec)
        if diff < min_diff and diff <= tolerance:
            min_diff = diff
            closest = entry
    return closest

def link_hand_to_object(chunk_start, chunk_end, object_data, hand_data_list, frame_width, frame_height):
    links = []
    for obj_id, obj in object_data.items():
        for traj in obj["trajectory"]:
            time_sec = traj["time"]
            if not (chunk_start <= time_sec <= chunk_end):
                continue

            bbox = traj["bbox"]
            hand_entry = find_closest_hand_data(time_sec, hand_data_list)
            if not hand_entry:
                continue

            for hand in hand_entry.get("hands", []):
                if hand_touches_object(hand, bbox, frame_width, frame_height):
                    contact_type = "touch"
                elif hand_points_to_object(hand, bbox, frame_width, frame_height):
                    contact_type = "point"
                else:
                    continue

                links.append({
                    "object_id": obj_id,
                    "class_name": obj["class_name"],
                    "time": time_sec,
                    "contact_type": contact_type
                })
                break
    return links

In [None]:
# Videoresolutie
frame_width = 1920
frame_height = 1080

# Laad objectdata
with open(r"C:\Users\maxim\OneDrive\01-Opleidingen\03-MAAI\Afstuderen\Objectdetectie\detections_with_tracking_YOLOv12mv8_machine_cover_2.json", "r", encoding="utf-8") as f:
    object_data = json.load(f)

# Laad chunks
with open("chunks_with_objects_and_actions_machine_cover_2.json", "r", encoding="utf-8") as f:
    chunks_data = json.load(f)

# Verwerk alle chunks
for chunk in chunks_data["chunks"]:
    start = chunk["start"]
    end = chunk["end"]
    hand_links = link_hand_to_object(start, end, object_data, hand_data, frame_width, frame_height)
    chunk["hand_object_links"] = hand_links

# Opslaan
with open("chunks_with_objects_actions_hands_machine_cover_2.json", "w", encoding="utf-8") as f:
    json.dump(chunks_data, f, indent=4)
    print("JSON met hand-object interacties opgeslagen als 'chunks_with_objects_actions_handsv2.json'")

## Sentence transformer captions linking to chunks

In [None]:
import json
import os
from typing import List, Dict, Any
from sentence_transformers import SentenceTransformer, util
import torch
from tqdm import tqdm
import re

def load_json(file_path: str) -> Any:
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def save_json(data: Any, file_path: str):
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)
    print(f"JSON opgeslagen als '{file_path}'.")

def preprocess_chunks(chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    processed_chunks = []
    for chunk in chunks:
        text = chunk.get('text', "")
        if text:
            # Gebruik zowel start als end tijden als float voor unieke identificatie
            processed_chunks.append({
                "chunk_id": f"{chunk['start']}_{chunk['end']}",
                "text": text,
                "start": chunk['start'],
                "end": chunk['end']
            })
    return processed_chunks

def preprocess_captions(captions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    processed_captions = []
    for caption in captions:
        text = caption.get('caption', "")
        if text:
            processed_captions.append({
                "video_segment": caption.get('video_segment', ""),
                "frame_filename": caption.get('frame_filename', ""),
                "caption": text
            })
    return processed_captions

def compute_embeddings(model: SentenceTransformer, texts: List[str], batch_size: int = 32) -> torch.Tensor:

    embeddings = model.encode(texts, batch_size=batch_size, convert_to_tensor=True, show_progress_bar=True)
    return embeddings

def parse_segment_times(segment_name: str) -> tuple[int, int]:
    match = re.search(r"segment_\d+_(\d+)_(\d+)", segment_name)
    if match:
        return int(match.group(1)), int(match.group(2))
    return (0, 0)

def link_captions_to_chunks(
    chunks: List[Dict[str, Any]],
    captions: List[Dict[str, Any]],
    similarity_threshold: float = 0.3
) -> List[Dict[str, Any]]:

    model = SentenceTransformer('all-mpnet-base-v2')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)

    links = []

    for idx, caption in enumerate(tqdm(captions, desc="Linking Captions")):
        segment_name = caption.get("video_segment", "")
        caption_text = caption.get("caption", "").strip()

        if not caption_text or not segment_name:
            continue

        segment_start, segment_end = parse_segment_times(segment_name)
        
        margin = 1
        candidate_chunks = [
            chunk for chunk in chunks
                if chunk["start"] >= segment_start - margin and chunk["end"] <= segment_end + margin
        ]

        if not candidate_chunks:
            links.append({
                "caption_index": idx,
                "frame_filename": caption.get("frame_filename", ""),
                "video_segment": segment_name,
                "caption": caption_text,
                "linked_chunk_id": None,
                "linked_chunk_text": None,
                "similarity_score": None
            })
            continue

        # Bereken caption embedding correct als tensor
        caption_embedding = model.encode([caption_text], convert_to_tensor=True).to(device)

        chunk_texts = [chunk["text"] for chunk in candidate_chunks]
        chunk_embeddings = model.encode(chunk_texts, convert_to_tensor=True).to(device)

        similarities = util.cos_sim(caption_embedding, chunk_embeddings)[0]  # shape: (num_chunks,)
        top_idx = torch.argmax(similarities).item()
        top_score = similarities[top_idx].item()

        print(f"⏱ Caption {idx}: top_score = {top_score:.4f}, threshold = {similarity_threshold}")


        if top_score >= similarity_threshold:
            linked_chunk = candidate_chunks[top_idx]
            links.append({
                "caption_index": idx,
                "frame_filename": caption.get("frame_filename", ""),
                "video_segment": segment_name,
                "caption": caption_text,
                "linked_chunk_id": linked_chunk['chunk_id'],
                "linked_chunk_text": linked_chunk['text'],
                "similarity_score": top_score
            })
        else:
            links.append({
                "caption_index": idx,
                "frame_filename": caption.get("frame_filename", ""),
                "video_segment": segment_name,
                "caption": caption_text,
                "linked_chunk_id": None,
                "linked_chunk_text": None,
                "similarity_score": top_score
            })

    return links


def filter_linked_captions(links: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Filter de links om alleen de gekoppelde captions te behouden.
    """
    filtered_links = [link for link in links if link['linked_chunk_id'] is not None]
    print(f"Filtered captions: {len(filtered_links)} out of {len(links)} were successfully linked.")
    return filtered_links

def main():
    # Definieer paden
    chunks_json_path = "processed_json_machine_cover_2/machine_cover_2_chunks.json"        # Originele JSON met chunks
    captions_json_path = "combined_frames_machine_cover_2.json"              # JSON met image captions
    output_mapping_path = "captions_to_chunks_mapping_machine_cover_2.json" # Nieuwe JSON output (alle koppelingen)
    filtered_output_path = "filtered_captions_to_chunks_mapping_machine_cover_2.json" # Nieuwe JSON output (gekoppelde captions)

    # Controleer of alle benodigde bestanden bestaan
    required_files = [chunks_json_path, captions_json_path]
    for file in required_files:
        if not os.path.exists(file):
            print(f"Error: Vereist bestand '{file}' bestaat niet.")
            return

    # Laad de JSON-bestanden
    print("Loading JSON files...")
    chunks_data = load_json(chunks_json_path)
    captions_data = load_json(captions_json_path)

    chunks = chunks_data.get('chunks', [])
    captions = captions_data  # Verondersteld dat combined_frames.json een lijst is

    if not chunks:
        print("Geen chunks gevonden in de hoofd JSON.")
        return
    if not captions:
        print("Geen captions gevonden in de captions JSON.")
        return

    print(f"Loaded {len(chunks)} chunks and {len(captions)} captions.")

    # Voorbereiden van data
    processed_chunks = preprocess_chunks(chunks)
    processed_captions = preprocess_captions(captions)

    # Koppel captions aan chunks via Sentence Transformers
    links = link_captions_to_chunks(processed_chunks, processed_captions, similarity_threshold=0.1)
    print(f"Generated {len(links)} caption links.")

    # Sla de volledige mapping op als een nieuwe JSON
    save_json(links, output_mapping_path)
    print(f"Captions to chunks mapping saved to '{output_mapping_path}'.")

    # Filter de gekoppelde captions
    filtered_links = filter_linked_captions(links)

    # Sla de gefilterde mapping op als een nieuwe JSON
    save_json(filtered_links, filtered_output_path)
    print(f"Filtered captions to chunks mapping saved to '{filtered_output_path}'.")

if __name__ == "__main__":
    main()

In [None]:
import json

# Pad naar je bestanden
filtered_links_path = "filtered_captions_to_chunks_mapping_machine_cover_2.json"
enriched_chunks_path = "chunks_with_objects_actions_hands_machine_cover_2.json"  # <-- LET OP: met hand-object links nu
output_path = "enriched_filtered_captions_to_chunks_mapping_hands_machine_cover_2.json"

# 1. Laad beide JSON-bestanden
with open(filtered_links_path, 'r', encoding='utf-8') as f:
    filtered_links = json.load(f)

with open(enriched_chunks_path, 'r', encoding='utf-8') as f:
    enriched_chunks = json.load(f)["chunks"]

# 2. Bouw index: van "start_end" naar chunk
chunk_index = {}
for chunk in enriched_chunks:
    key = f"{chunk['start']}_{chunk['end']}"
    chunk_index[key] = chunk

# 3. Verwerk de mappinglijst
enriched_links = []
for item in filtered_links:
    chunk_id = item.get("linked_chunk_id")
    if not chunk_id:
        enriched_links.append(item)
        continue

    matching_chunk = chunk_index.get(chunk_id)
    if not matching_chunk:
        print(f"Waarschuwing: Geen match voor chunk ID {chunk_id}")
        enriched_links.append(item)
        continue

    # Voeg objecten/acties/hand-object links toe
    item["object_detections"] = matching_chunk.get("object_detections", [])
    item["actions"] = matching_chunk.get("actions", [])
    item["object_action_links"] = matching_chunk.get("object_action_links", [])
    item["hand_object_links"] = matching_chunk.get("hand_object_links", [])  # <-- Nieuw toegevoegd
    enriched_links.append(item)

# 4. Sla nieuwe lijst op
with open(output_path, 'w', encoding='utf-8') as f:
    json.dump(enriched_links, f, indent=4)

print(f"Verrijkte mapping opgeslagen in '{output_path}' inclusief hand-object links.")


# Cake
## Download model

In [None]:
from llama_cpp import Llama

llm = Llama.from_pretrained(
    repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
    filename="*Q4_K_M.gguf",
    verbose=False,
    local_dir="models",
)

## Knowledge extraction

In [None]:
# import json
# import os
# import pickle
# import threading
# from llama_cpp import Llama
# from sentence_transformers import SentenceTransformer
# import faiss
# import numpy as np
# from json import JSONDecodeError

# # llama singleton to ensure one model is used for ram usage efficiency
# class LlamaSingleton:
#     _instance = None
#     _lock = threading.Lock()

#     def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
#         with cls._lock:
#             if cls._instance is None:
#                 cls._instance = super(LlamaSingleton, cls).__new__(cls)
#                 cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
#             return cls._instance

# # chatbot class to extract knowledge from chunks
# class Chatbot:
#     def __init__(self, 
#                  messages_file='messages_machine_cover_2.json', 
#                  knowledge_file='knowledge_machine_cover_2.json', 
#                  faiss_index_file='faiss_index_machine_cover_2.pkl',
#                  model_name='all-MiniLM-L6-v2'):
#         self.messages_file = messages_file
#         self.knowledge_file = knowledge_file
#         self.faiss_index_file = faiss_index_file
#         self.llm = LlamaSingleton().llm
#         self.model = SentenceTransformer(model_name)
#         self.index = None
#         self.knowledge_data = []
#         self.initialize_files()
#         self.load_faiss_index()

#     def initialize_files(self):
#         for file in [self.messages_file, self.knowledge_file]:
#             if not os.path.exists(file):
#                 with open(file, 'w') as f:
#                     json.dump([], f)

#     def load_json_data(self, file_path):
#         with open(file_path, 'r') as f:
#             return json.load(f)

#     def save_json_data(self, file_path, data):
#         with open(file_path, 'w') as f:
#             json.dump(data, f, indent=4)

#     # this function extracts the knowledge from the chunk text
#     def extract_valuable_knowledge(self, message):
#         """
#         Sends the chunk text to the model and asks it to return JSON with
#         subject/predicate/object. No timestamps are generated by the model.
#         """
#         response = self.llm.create_chat_completion(
#             messages=[
#                 {
#                     "role": "system",
#                     "content": (
#                         "You are a knowledge extractor. Extract subject-predicate-object knowledge triplets \n" # WEGHALEN
#                         "from all the provided information: transcript, linked object-action pairs and hand-object interactions.\n" # WEGHALEN
#                         """Correct example: { "subject": "person", "predicate": "holds", "object": "cup" }Incorrect example: { "subject": "he", "predicate": "does", "object": "it" }"""
#                         # "Return ONLY JSON with the following schema:\n"
#                         # "{\n"
#                         # "  \"valuable_knowledge\": [\n"
#                         # "    {\n"
#                         # "      \"subject\": \"...\",\n"
#                         # "      \"predicate\": \"...\",\n"
#                         # "      \"object\": \"...\"\n"
#                         # "    }\n"
#                         # "  ]\n"
#                         # "}\n"
#                         # "If no knowledge can be extracted, return:\n"
#                         # "{\"valuable_knowledge\": []}"
#                     )
#                 },
#                 {"role": "user", "content": message},
#             ],
#             response_format={
#                 "type": "json",
#                 "schema": {
#                     "type": "object",
#                     "properties": {
#                         "valuable_knowledge": {
#                             "type": "array",
#                             "items": {
#                                 "type": "object",
#                                 "properties": {
#                                     "subject": {"type": "string"},
#                                     "predicate": {"type": "string"},
#                                     "object": {"type": "string"}
#                                 },
#                                 "required": ["subject", "predicate", "object"]
#                             }
#                         }
#                     },
#                     "required": ["valuable_knowledge"],
#                 },
#             },
#             temperature=0.5,
#         )
#         try:
#             response_content = response['choices'][0]['message']['content']

#             # Strip ```json en ``` als ze aanwezig zijn
#             if response_content.strip().startswith("```json"):
#                 response_content = response_content.strip()[7:]  # strip '```json\n'
#             if response_content.strip().endswith("```"):
#                 response_content = response_content.strip()[:-3]  # strip trailing '```'

#             knowledge_data = json.loads(response_content) # Vervanging voor regel hier onder
#             #knowledge_data = json.loads(response['choices'][0]['message']['content'])
#             print("Extracted knowledge from a chunk:", knowledge_data)
#             if "valuable_knowledge" not in knowledge_data:
#                 knowledge_data["valuable_knowledge"] = []
#             return knowledge_data["valuable_knowledge"]
#         except (JSONDecodeError, KeyError) as e:
#             print(f"Parsing error: {e}")
#             print("Volledige model response:", response) # WEGHALEN WANNEER KLAAR
#             return []

#     def save_knowledge(self, triplets):
#         """
#         Persists triplets to `knowledge.json` and updates FAISS index if new triplets
#         are found. We do not add any timestamps here.
#         """
#         if not triplets:
#             return
#         knowledge = self.load_json_data(self.knowledge_file)
#         existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
#         new_triplets = []
#         for triplet in triplets:
#             key = (triplet['subject'], triplet['predicate'], triplet['object'])
#             if key not in existing_set:
#                 knowledge.append(triplet)
#                 new_triplets.append(triplet)
#                 existing_set.add(key)
#         self.save_json_data(self.knowledge_file, knowledge)
#         if new_triplets:
#             self.update_faiss_index(new_triplets)

#     def update_faiss_index(self, triplets):
#         texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
#         embeddings = self.model.encode(texts)
#         if self.index is None:
#             self.index = faiss.IndexFlatL2(embeddings.shape[1])
#         self.index.add(np.array(embeddings, dtype=np.float32))
#         self.knowledge_data.extend(triplets)
#         self.save_faiss_index()

#     def save_faiss_index(self):
#         with open(self.faiss_index_file, 'wb') as f:
#             pickle.dump((self.index, self.knowledge_data), f)

#     def load_faiss_index(self):
#         if os.path.exists(self.faiss_index_file):
#             with open(self.faiss_index_file, 'rb') as f:
#                 self.index, self.knowledge_data = pickle.load(f)
#         else:
#             self.index = None
#             self.knowledge_data = []


# # Chats idee + audio altijd meenemen
# def main():
#     # Paden naar inputbestanden
#     enriched_links_path = "enriched_filtered_captions_to_chunks_mapping_hands_machine_cover_2json"
#     all_chunks_path = "processed_json_machine_cover_2/machine_cover_2_chunks.json"

#     # Bestandscontrole
#     if not os.path.exists(enriched_links_path):
#         print(f"Bestand '{enriched_links_path}' bestaat niet.")
#         return
#     if not os.path.exists(all_chunks_path):
#         print(f"Bestand '{all_chunks_path}' bestaat niet.")
#         return

#     # Laad verrijkte links en chunks
#     with open(enriched_links_path, 'r', encoding='utf-8') as f:
#         enriched_links = json.load(f)

#     with open(all_chunks_path, 'r', encoding='utf-8') as f:
#         chunk_data = json.load(f)
#     all_chunks = chunk_data.get("chunks", [])

#     print(f"{len(all_chunks)} chunks geladen.")
#     print(f"{len(enriched_links)} verrijkte caption-links geladen.")

#     # Bouw een mapping van chunk_id → enrichment info
#     from collections import defaultdict
#     enrichment_map = {}
#     for item in enriched_links:
#         chunk_id = item.get("linked_chunk_id", "")
#         if chunk_id:
#             enrichment_map[chunk_id] = item

#     chatbot = Chatbot()

#     for i, chunk in enumerate(all_chunks, start=1):
#         chunk_text = chunk.get("text", "").strip()
#         start_time = chunk.get("start")
#         end_time = chunk.get("end")
#         chunk_id = f"{start_time}_{end_time}"

#         if not chunk_text:
#             print(f"Skipping lege chunk {chunk_id}")
#             continue

#         # Kijk of er enrichment bestaat
#         enrichment = enrichment_map.get(chunk_id)

#         # Bouw de prompt op
#         if enrichment:
#             caption = enrichment.get("caption", "").strip()
#             object_classes = [o.get("class_name", "") for o in enrichment.get("object_detections", [])]
#             action_labels = [a.get("label", "") for a in enrichment.get("actions", [])]
#             relations = [
#                 f"{l['class_name']} → {l['action_label']}"
#                 for l in enrichment.get("object_action_links", [])
#             ]
            
#             # Hand-object interactions: alleen unieke objectnamen zonder (contact_type)
#             unique_hand_contacts = set(
#                 h.get("class_name", "").strip()
#                 for h in enrichment.get("hand_object_links", [])
#                 if h.get("class_name", "").strip()
#             )
#             hand_object_contacts = sorted(unique_hand_contacts)

#             combined_input = (
#                 f"Transcript: {chunk_text}\n"
#                 #f"Caption: {caption}\n"
#                 #f"Objects in scene: {', '.join(object_classes)}\n"
#                 #f"Detected actions: {', '.join(action_labels)}\n"
#                 f"Linked object-action pairs: {', '.join(relations)}\n"
#                 f"Hand-object interactions: {', '.join(hand_object_contacts)}"
#             )
#         else:
#             combined_input = f"Transcript: {chunk_text}"


#         print(f"\n[{i}] Extracting knowledge for chunk {chunk_id}")

#         print(combined_input)
#         extracted_knowledge = chatbot.extract_valuable_knowledge(combined_input)

#         if extracted_knowledge:
#             for triplet in extracted_knowledge:
#                 triplet["start"] = start_time
#                 triplet["end"] = end_time
#             chatbot.save_knowledge(extracted_knowledge)

#     print("\nKennisextractie afgerond. Bekijk 'knowledge.json' voor de resultaten.")

# if __name__ == "__main__":
#     main()

import json
import os
import pickle
import threading
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages_machine_cover_2_info.json', 
                 knowledge_file='knowledge_machine_cover_2_info.json', 
                 faiss_index_file='faiss_index_machine_cover_2_info.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Extract subject-predicate-object knowledge triplets "
                        "from all the provided information: transcript, linked object-action pairs and hand-object interactions.\n"
                        "Correct example: { \"subject\": \"person\", \"predicate\": \"holds\", \"object\": \"cup\" }\n"
                        "Incorrect example: { \"subject\": \"he\", \"predicate\": \"does\", \"object\": \"it\" }\n"
                        # "Return ONLY a JSON object like: {\"valuable_knowledge\": [ ... ]}\n"
                        "If no knowledge can be extracted, return: {\"valuable_knowledge\": []}"
                        # "You are a knowledge extractor. Extract subject-predicate-object knowledge triplets \n" # WEGHALEN
                        # "from all the provided information: transcript, linked object-action pairs and hand-object interactions.\n" # WEGHALEN
                        # """Correct example: { "subject": "person", "predicate": "holds", "object": "cup" }Incorrect example: { "subject": "he", "predicate": "does", "object": "it" }"""
                        # # "Return ONLY JSON with the following schema:\n"
                        # # "{\n"
                        # # "  \"valuable_knowledge\": [\n"
                        # # "    {\n"
                        # # "      \"subject\": \"...\",\n"
                        # # "      \"predicate\": \"...\",\n"
                        # # "      \"object\": \"...\"\n"
                        # # "    }\n"
                        # # "  ]\n"
                        # # "}\n"
                        # "If no knowledge can be extracted, return:\n"
                        # "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"}
                                },
                                "required": ["subject", "predicate", "object"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )

        try:
            response_content = response['choices'][0]['message']['content'].strip()

            # Verwijder markdown-codeblokken
            if response_content.startswith("```json"):
                response_content = response_content[7:]
            if response_content.endswith("```"):
                response_content = response_content[:-3]

            parsed = json.loads(response_content)

            if isinstance(parsed, list):
                print("Model returned a list instead of expected object. Wrapping it.")
                parsed = {"valuable_knowledge": parsed}

            if "valuable_knowledge" not in parsed:
                parsed["valuable_knowledge"] = []

            print("Extracted knowledge from a chunk:", parsed)
            return parsed["valuable_knowledge"]

        except (JSONDecodeError, KeyError, TypeError) as e:
            print(f"JSON parse error: {e}")
            print("Raw model response:\n", response)
            return []

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

def main():
    # enriched_links_path = "enriched_filtered_captions_to_chunks_mapping_hands.json"
    # all_chunks_path = "processed_json/stabilizer pressure control_chunks.json"
    enriched_links_path = "enriched_filtered_captions_to_chunks_mapping_hands_machine_cover_2.json"
    all_chunks_path = "processed_json_machine_cover_2/machine_cover_2_chunks.json"

    if not os.path.exists(enriched_links_path):
        print(f"Bestand '{enriched_links_path}' bestaat niet.")
        return
    if not os.path.exists(all_chunks_path):
        print(f"Bestand '{all_chunks_path}' bestaat niet.")
        return

    with open(enriched_links_path, 'r', encoding='utf-8') as f:
        enriched_links = json.load(f)

    with open(all_chunks_path, 'r', encoding='utf-8') as f:
        chunk_data = json.load(f)
    all_chunks = chunk_data.get("chunks", [])

    print(f"{len(all_chunks)} chunks geladen.")
    print(f"{len(enriched_links)} verrijkte caption-links geladen.")

    enrichment_map = {item.get("linked_chunk_id", ""): item for item in enriched_links if item.get("linked_chunk_id")}

    chatbot = Chatbot()

    for i, chunk in enumerate(all_chunks, start=1):
        chunk_text = chunk.get("text", "").strip()
        start_time = chunk.get("start")
        end_time = chunk.get("end")
        chunk_id = f"{start_time}_{end_time}"

        if not chunk_text:
            print(f"Skipping lege chunk {chunk_id}")
            continue

        enrichment = enrichment_map.get(chunk_id)

        if enrichment:
            relations = [
                f"{l['class_name']} → {l['action_label']}"
                for l in enrichment.get("object_action_links", [])
            ]
            unique_hand_contacts = set(
                h.get("class_name", "").strip()
                for h in enrichment.get("hand_object_links", [])
                if h.get("class_name", "").strip()
            )
            hand_object_contacts = sorted(unique_hand_contacts)

            combined_input = (
                f"Transcript: {chunk_text}\n"
                f"Linked object-action pairs: {', '.join(relations)}\n"
                f"Hand-object interactions: {', '.join(hand_object_contacts)}"
            )
        else:
            combined_input = f"Transcript: {chunk_text}"

        print(f"\n[{i}] Extracting knowledge for chunk {chunk_id}")
        print(combined_input)

        extracted_knowledge = chatbot.extract_valuable_knowledge(combined_input)

        if extracted_knowledge:
            for triplet in extracted_knowledge:
                triplet["start"] = start_time
                triplet["end"] = end_time
            chatbot.save_knowledge(extracted_knowledge)

    print("\nKennisextractie afgerond. Bekijk 'knowledge.json' voor de resultaten.")

if __name__ == "__main__":
    main()


In [None]:
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Totaal uitgevoerde tijd: {elapsed_time:.2f} seconden")

In [None]:
import torch

print(f"PyTorch versie: {torch.__version__}")
print(f"CUDA beschikbaar: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA versie: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")


## Test generated knowledge base with LLM with the integrated knowledge base

In [None]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    # this function generates a response based on the conversation history and user message and the knowledge top k 5 matches
    def generate_response(self, conversation_history, user_message):
        knowledge_matches = self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Answer based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Videotimestamps: start: {t['start']}, end: {t['end']})\n"
            
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}] #+ conversation_history
        enriched_history.append({"role": "user", "content": user_message})
        print(enriched_history)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            self.save_message(role='user', content=user_message)
            conversation = self.load_json_data(self.messages_file)[-3:]
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")
            self.save_message(role='assistant', content=assistant_response)


if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

In [None]:
# # somting
# if enrichment:
#             caption = enrichment.get("caption", "").strip()
#             object_classes = [o.get("class_name", "") for o in enrichment.get("object_detections", [])]
#             action_labels = [a.get("label", "") for a in enrichment.get("actions", [])]
            
#             # Pas de linked object-action pairs correct aan
#             relations = []
#             for link in enrichment.get("object_action_links", []):
#                 object_name = link.get("class_name", "").strip()
#                 action_label = link.get("action_label", "").strip()

#                 # Check of de action_label eindigt op "something" en pas dan aan
#                 if "something" in action_label.lower():
#                     # Verwijder "something" uit de action label en voeg objectnaam toe
#                     new_action = action_label.lower().replace("something", object_name)
#                     new_action = new_action.strip()
#                     # Maak eerste letter hoofdletter als origineel dat ook was
#                     if action_label and action_label[0].isupper():
#                         new_action = new_action.capitalize()
#                     relations.append(new_action)
#                 else:
#                     relations.append(f"{object_name} → {action_label}")

#             unique_hand_contacts = set(
#                 f"{h['class_name']} ({h['contact_type']})"
#                 for h in enrichment.get("hand_object_links", [])
#             )
#             hand_object_contacts = sorted(unique_hand_contacts)

#             combined_input = (
#                 f"Transcript: {chunk_text}\n"
#                 f"Caption: {caption}\n"
#                 #f"Objects in scene: {', '.join(object_classes)}\n"
#                 #f"Detected actions: {', '.join(action_labels)}\n"
#                 f"Linked object-action pairs: {', '.join(relations)}\n"
#                 f"Hand-object interactions: {', '.join(hand_object_contacts)}"
#             )
#         else:
#             combined_input = f"Transcript: {chunk_text}"

#         print(f"\n[{i}] Extracting knowledge for chunk {chunk_id}")

## Extra controls voor de triplets

In [None]:
import os
import json
import threading
from llama_cpp import Llama

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance


class VIDAR:
    def __init__(self, output_file="vidar_memory_machine_cover_2_prompt.json"):
        self.llm = LlamaSingleton().llm
        self.output_file = output_file
        self.memory = []
        self._load_memory()

    def _load_memory(self):
        if os.path.exists(self.output_file):
            with open(self.output_file, "r", encoding="utf-8") as f:
                self.memory = json.load(f)

    def _save_memory(self):
        with open(self.output_file, "w", encoding="utf-8") as f:
            json.dump(self.memory, f, indent=2)

    # def run_vidar_cycle(self, chunk_id, transcript, relations, hand_contacts):
    #     observations = f"Transcript: {transcript}\n" \
    #                    f"Linked object-action pairs: {', '.join(relations)}\n" \
    #                    f"Hand-object interactions: {', '.join(hand_contacts)}"

    #     messages = [
    #         {"role": "system", "content": (
    #             # "You are VIDAR, a reasoning entity. Analyze the following observations from a scene. "
    #             # "First, hypothesize what is happening, then reflect on what is uncertain. "
    #             # "Return a JSON with keys: hypothesis, reasoning, doubts (list), knowledge (triplets). "
    #             # "Each triplet has subject, predicate, object."
    #             "You are VIDAR, a reasoning entity. Analyze the following scene observations. " \
    #             "First, formulate a plausible hypothesis about what is occurring. " \
    #             "Then, reflect on uncertainties in the observations. " \
    #             "Respond in JSON format with the following keys: hypothesis (main interpretation), reasoning (explanation), doubts (list of unclear or ambiguous elements), knowledge (list of factual triplets). " \
    #             "Each triplet must be structured as: subject, predicate, object."
    #         )},
    #         {"role": "user", "content": observations}
    #     ]

    #     print(f"\nReasoning on chunk {chunk_id}...")
    #     print(observations)

    #     try:
    #         response = self.llm.create_chat_completion(
    #             messages=messages,
    #             temperature=0.6
    #         )
    #         content = response['choices'][0]['message']['content'].strip()

    #         # Verwijder eventuele Markdown-codeblokken
    #         if content.startswith("```json"):
    #             content = content[7:].strip()
    #         if content.endswith("```"):
    #             content = content[:-3].strip()

    #         # Escape line breaks binnen de "reasoning" waarde
    #         import re
    #         def escape_reasoning_block(match):
    #             inner = match.group(2).replace('\n', '\\n')
    #             return f'{match.group(1)}{inner}"'
    #         content = re.sub(
    #             r'("reasoning"\s*:\s*")([^"]*?)(?<!\\)"',
    #             escape_reasoning_block,
    #             content,
    #             flags=re.DOTALL
    #         )

    #         try:
    #             parsed = json.loads(content)
    #         except json.JSONDecodeError as json_err:
    #             print(f"JSON decode error in chunk {chunk_id}: {json_err}")
    #             print("Response content was:\n", content)
    #             return

    #         self.memory.append({
    #             "chunk_id": chunk_id,
    #             "observations": observations,
    #             "hypothesis": parsed.get("hypothesis", ""),
    #             "reasoning": parsed.get("reasoning", ""),
    #             "doubts": parsed.get("doubts", []),
    #             "knowledge": parsed.get("knowledge", [])
    #         })

    #         self._save_memory()

    #     except Exception as e:
    #         print(f"Failed to process chunk {chunk_id}: {e}")

    def run_vidar_cycle(self, chunk_id, transcript, relations, hand_contacts, caption_text):
        observations = f"Transcript: {transcript}\n" \
                        f"Captions: {caption_text}" \
                        # f"Linked object-action pairs: {', '.join(relations)}\n" \
                        # f"Hand-object interactions: {', '.join(hand_contacts)}" \

        messages = [
            {"role": "system", "content": (
                "You are VIDAR, a reasoning entity. Analyze the following observations from a scene. "
                "First, hypothesize what is happening, then reflect on what is uncertain. "
                "Return a JSON with keys: hypothesis, reasoning, doubts (list), knowledge (triplets). "
                "Each triplet has subject, predicate, object."
            )},
            {"role": "user", "content": observations}
        ]

        print(f"\nReasoning on chunk {chunk_id}...")
        print(observations)

        try:
            response = self.llm.create_chat_completion(
                messages=messages,
                response_format={
                    "type": "json",
                    "schema": {
                        "type": "object",
                        "properties": {
                            "hypothesis": {"type": "string"},
                            "reasoning": {"type": "string"},
                            "doubts": {
                                "type": "array",
                                "items": {"type": "string"}
                            },
                            "knowledge": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "subject": {"type": "string"},
                                        "predicate": {"type": "string"},
                                        "object": {"type": "string"}
                                    },
                                    "required": ["subject", "predicate", "object"]
                                }
                            }
                        },
                        "required": ["hypothesis", "reasoning", "doubts", "knowledge"]
                    }
                },
                temperature=0.6
            )

            content = response['choices'][0]['message']['content'].strip()

            if content.startswith("```json"):
                content = content[7:].strip()
            if content.endswith("```"):
                content = content[:-3].strip()

            parsed = json.loads(content)

            self.memory.append({
                "chunk_id": chunk_id,
                "observations": observations,
                "hypothesis": parsed.get("hypothesis", ""),
                "reasoning": parsed.get("reasoning", ""),
                "doubts": parsed.get("doubts", []),
                "knowledge": parsed.get("knowledge", [])
            })
            self._save_memory()

        except (json.JSONDecodeError, KeyError) as e:
            print(f"JSON parsing error in chunk {chunk_id}: {e}")
            print("Model response:\n", response)
        except Exception as e:
            print(f"Failed to process chunk {chunk_id}: {e}")



def main():
    chunks_path = "processed_json_machine_cover_2/machine_cover_2_chunks.json"
    enrichment_path = "enriched_filtered_captions_to_chunks_mapping_hands_machine_cover_2.json"

    if not os.path.exists(chunks_path):
        print(f"Bestand '{chunks_path}' niet gevonden.")
        return

    with open(chunks_path, "r", encoding="utf-8") as f:
        chunks_data = json.load(f).get("chunks", [])

    enrichment_map = {}
    if os.path.exists(enrichment_path):
        with open(enrichment_path, "r", encoding="utf-8") as f:
            enriched = json.load(f)
        for item in enriched:
            chunk_id = item.get("linked_chunk_id")
            if chunk_id:
                # Bewaar alleen de eerste match per chunk_id
                if chunk_id not in enrichment_map:
                    enrichment_map[chunk_id] = item

    caption_map = {}
    with open(enrichment_path, "r", encoding="utf-8") as f:
        enriched = json.load(f)
    for item in enriched:
        chunk_id = item.get("linked_chunk_id")
        caption = item.get("caption")
        if chunk_id and caption:
            if chunk_id not in caption_map:
                caption_map[chunk_id] = []
            caption_map[chunk_id].append(caption)

    vidar = VIDAR()

    for chunk in chunks_data:
        start = chunk.get("start")
        end = chunk.get("end")
        transcript = chunk.get("text", "").strip()
        chunk_id = f"{start}_{end}"

        if not transcript:
            print(f"⏭ Skip: Lege transcript in chunk {chunk_id}")
            continue

        enrichment = enrichment_map.get(chunk_id, {})
        relations = [
            f"{l['class_name']} → {l['action_label']}"
            for l in enrichment.get("object_action_links", [])
        ]
        hand_contacts = sorted({
            h.get("class_name", "").strip()
            for h in enrichment.get("hand_object_links", [])
            if h.get("class_name", "").strip()
        })
        captions = caption_map.get(chunk_id, [])
        if captions:
            caption_text = "\n".join([f"- {c}" for c in captions])
        else:
            caption_text = ""
        # hand_contacts = sorted({
        #     f"{h.get('class_name', '').strip()} ({h.get('contact_type')})"
        #     for h in enrichment.get("hand_object_links", [])
        #     if h.get("class_name") and h.get("contact_type")
        # })

        vidar.run_vidar_cycle(chunk_id, transcript, relations, hand_contacts, caption_text)

    print("\n Reasoning compleet. Resultaten in 'vidar_memory.json'.")


if __name__ == "__main__":
    main()


## test

In [None]:
# import os
# import json
# import threading
# from llama_cpp import Llama

# # Singleton voor je lokale LLaMA-model
# class LlamaSingleton:
#     _instance = None
#     _lock = threading.Lock()

#     def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
#         with cls._lock:
#             if cls._instance is None:
#                 cls._instance = super().__new__(cls)
#                 cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
#             return cls._instance


# class VIDAR:
#     def __init__(self, output_file="vidar_memory.json"):
#         self.llm = LlamaSingleton().llm
#         self.output_file = output_file
#         self.memory = []
#         self._load_memory()

#     def _load_memory(self):
#         if os.path.exists(self.output_file):
#             with open(self.output_file, "r", encoding="utf-8") as f:
#                 self.memory = json.load(f)

#     def _save_memory(self):
#         with open(self.output_file, "w", encoding="utf-8") as f:
#             json.dump(self.memory, f, indent=2)

#     def run_vidar_cycle(self, chunk_id, transcript, relations, hand_contacts):
#         observations = f"Transcript: {transcript}\n" \
#                        f"Linked object-action pairs: {', '.join(relations)}\n" \
#                        f"Hand-object interactions: {', '.join(hand_contacts)}"

#         messages = [
#             {"role": "system", "content": (
#                 "You are VIDAR, a reasoning entity. Analyze the following observations from a scene. "
#                 "First, hypothesize what is happening, then reflect on what is uncertain. "
#                 "Return a JSON with keys: hypothesis, reasoning, doubts (list), knowledge (triplets). "
#                 "Each triplet has subject, predicate, object."
#             )},
#             {"role": "user", "content": observations}
#         ]

#         print(f"\nReasoning on chunk {chunk_id}...")
#         print(observations)

#         try:
#             response = self.llm.create_chat_completion(
#                 messages=messages,
#                 temperature=0.6
#             )
#             content = response['choices'][0]['message']['content'].strip()
#             if content.startswith("```json"):
#                 content = content[7:]
#             if content.endswith("```"):
#                 content = content[:-3]
#             parsed = json.loads(content)

#             self.memory.append({
#                 "chunk_id": chunk_id,
#                 "observations": observations,
#                 "hypothesis": parsed.get("hypothesis", ""),
#                 "reasoning": parsed.get("reasoning", ""),
#                 "doubts": parsed.get("doubts", []),
#                 "knowledge": parsed.get("knowledge", [])
#             })

#             self._save_memory()

#         except Exception as e:
#             print(f"Failed to process chunk {chunk_id}: {e}")


# def main():
#     input_file = "enriched_filtered_captions_to_chunks_mapping_hands.json"
#     if not os.path.exists(input_file):
#         print(f"Inputbestand '{input_file}' niet gevonden.")
#         return

#     with open(input_file, "r", encoding="utf-8") as f:
#         data = json.load(f)

#     vidar = VIDAR()

#     for i, item in enumerate(data, start=1):
#         chunk_id = item.get("linked_chunk_id")
#         if not chunk_id:
#             print(f"⏭ Skip: geen chunk ID in item {i}")
#             continue

#         transcript = item.get("linked_chunk_text", "").strip()
#         if not transcript:
#             print(f"⏭ Skip: lege transcript in chunk {chunk_id}")
#             continue

#         relations = [
#             f"{l['class_name']} → {l['action_label']}"
#             for l in item.get("object_action_links", [])
#         ]
#         hand_contacts = sorted({
#             h.get("class_name", "").strip()
#             for h in item.get("hand_object_links", [])
#             if h.get("class_name", "").strip()
#         })

#         vidar.run_vidar_cycle(chunk_id, transcript, relations, hand_contacts)

#     print("\nVIDAR reasoning voltooid. Resultaat opgeslagen in 'vidar_memory.json'.")


# if __name__ == "__main__":
#     main()


In [None]:
import json
import os
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

def load_vidar_memory(path="vidar_memory_machine_cover_2_prompt.json"):
    if not os.path.exists(path):
        raise FileNotFoundError(f"Bestand '{path}' bestaat niet.")
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

def convert_to_knowledge_format(vidar_chunks):
    knowledge_triplets = []
    empty_count = 0

    for entry in vidar_chunks:
        chunk_id = entry.get("chunk_id", "")
        start, end = chunk_id.split("_") if "_" in chunk_id else (None, None)

        knowledge = entry.get("knowledge", [])

        # Case 1: dict with "triplets" key (correct VIDAR structure)
        if isinstance(knowledge, dict) and "triplets" in knowledge:
            triplets_raw = knowledge["triplets"]
        # Case 2: already a list of triplets
        elif isinstance(knowledge, list):
            triplets_raw = knowledge
        else:
            triplets_raw = []

        if not triplets_raw:
            empty_count += 1
            continue

        for triplet in triplets_raw:
            if isinstance(triplet, list) and len(triplet) == 3:
                subj, pred, obj = triplet
            elif isinstance(triplet, dict) and all(k in triplet for k in ["subject", "predicate", "object"]):
                subj, pred, obj = triplet["subject"], triplet["predicate"], triplet["object"]
            else:
                continue

            knowledge_triplets.append({
                "subject": subj,
                "predicate": pred,
                "object": obj,
                "start": float(start) if start else None,
                "end": float(end) if end else None
            })

    print(f"{len(vidar_chunks)} chunks gevonden.")
    print(f"{empty_count} chunks bevatten GEEN knowledge-triplets.")
    print(f"{len(knowledge_triplets)} triplets geëxtraheerd.")
    return knowledge_triplets



def save_json(data, path):
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)
    print(f"JSON opgeslagen als '{path}'.")

def build_faiss_index(triplets, model_name='all-MiniLM-L6-v2', index_path='faiss_index.pkl'):
    print("FAISS-index bouwen...")
    model = SentenceTransformer(model_name)
    texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
    embeddings = model.encode(texts)

    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings, dtype=np.float32))

    with open(index_path, 'wb') as f:
        pickle.dump((index, triplets), f)
    print(f"FAISS index opgeslagen als '{index_path}'.")

def main():
    vidar_path = "vidar_memory_machine_cover_2_prompt.json"
    knowledge_path = "knowledge_vidar_machine_cover_2_prompt.json"
    faiss_path = "faiss_index_vidar_machine_cover_2_prompt.pkl"

    print("Converteer VIDAR-output naar knowledge-formaat...")
    vidar_data = load_vidar_memory(vidar_path)
    triplets = convert_to_knowledge_format(vidar_data)

    save_json(triplets, knowledge_path)
    build_faiss_index(triplets, model_name='all-MiniLM-L6-v2', index_path=faiss_path)

if __name__ == "__main__":
    main()


In [None]:
import json
import os
import pickle
import threading
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Extract subject-predicate-object knowledge triplets "
                        "from all the provided information: transcript, linked object-action pairs and hand-object interactions.\n"
                        "Correct example: { \"subject\": \"person\", \"predicate\": \"holds\", \"object\": \"cup\" }\n"
                        "Incorrect example: { \"subject\": \"he\", \"predicate\": \"does\", \"object\": \"it\" }\n"
                        # "Return ONLY a JSON object like: {\"valuable_knowledge\": [ ... ]}\n"
                        "If no knowledge can be extracted, return: {\"valuable_knowledge\": []}"
                        # "You are a knowledge extractor. Extract subject-predicate-object knowledge triplets \n" # WEGHALEN
                        # "from all the provided information: transcript, linked object-action pairs and hand-object interactions.\n" # WEGHALEN
                        # """Correct example: { "subject": "person", "predicate": "holds", "object": "cup" }Incorrect example: { "subject": "he", "predicate": "does", "object": "it" }"""
                        # # "Return ONLY JSON with the following schema:\n"
                        # # "{\n"
                        # # "  \"valuable_knowledge\": [\n"
                        # # "    {\n"
                        # # "      \"subject\": \"...\",\n"
                        # # "      \"predicate\": \"...\",\n"
                        # # "      \"object\": \"...\"\n"
                        # # "    }\n"
                        # # "  ]\n"
                        # # "}\n"
                        # "If no knowledge can be extracted, return:\n"
                        # "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            temperature=0.5,
        )

        try:
            response_content = response['choices'][0]['message']['content'].strip()

            # Verwijder markdown-codeblokken
            if response_content.startswith("```json"):
                response_content = response_content[7:]
            if response_content.endswith("```"):
                response_content = response_content[:-3]

            parsed = json.loads(response_content)

            if isinstance(parsed, list):
                print("Model returned a list instead of expected object. Wrapping it.")
                parsed = {"valuable_knowledge": parsed}

            if "valuable_knowledge" not in parsed:
                parsed["valuable_knowledge"] = []

            print("Extracted knowledge from a chunk:", parsed)
            return parsed["valuable_knowledge"]

        except (JSONDecodeError, KeyError, TypeError) as e:
            print(f"JSON parse error: {e}")
            print("🔍 Raw model response:\n", response)
            return []

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

def main():
    # enriched_links_path = "enriched_filtered_captions_to_chunks_mapping_hands.json"
    # all_chunks_path = "processed_json/stabilizer pressure control_chunks.json"
    enriched_links_path = "enriched_filtered_captions_to_chunks_mapping_hands_stabilizer_pressure_control_2.json"
    all_chunks_path = "processed_json_stabilizer_pressure_control_2/stabilizer_pressure_control_2_chunks.json"

    if not os.path.exists(enriched_links_path):
        print(f"Bestand '{enriched_links_path}' bestaat niet.")
        return
    if not os.path.exists(all_chunks_path):
        print(f"Bestand '{all_chunks_path}' bestaat niet.")
        return

    with open(enriched_links_path, 'r', encoding='utf-8') as f:
        enriched_links = json.load(f)

    with open(all_chunks_path, 'r', encoding='utf-8') as f:
        chunk_data = json.load(f)
    all_chunks = chunk_data.get("chunks", [])

    print(f"{len(all_chunks)} chunks geladen.")
    print(f"{len(enriched_links)} verrijkte caption-links geladen.")

    enrichment_map = {item.get("linked_chunk_id", ""): item for item in enriched_links if item.get("linked_chunk_id")}

    chatbot = Chatbot()

    for i, chunk in enumerate(all_chunks, start=1):
        chunk_text = chunk.get("text", "").strip()
        start_time = chunk.get("start")
        end_time = chunk.get("end")
        chunk_id = f"{start_time}_{end_time}"

        if not chunk_text:
            print(f"Skipping lege chunk {chunk_id}")
            continue

        enrichment = enrichment_map.get(chunk_id)

        if enrichment:
            relations = [
                f"{l['class_name']} → {l['action_label']}"
                for l in enrichment.get("object_action_links", [])
            ]
            unique_hand_contacts = set(
                h.get("class_name", "").strip()
                for h in enrichment.get("hand_object_links", [])
                if h.get("class_name", "").strip()
            )
            hand_object_contacts = sorted(unique_hand_contacts)

            combined_input = (
                f"Transcript: {chunk_text}\n"
                f"Linked object-action pairs: {', '.join(relations)}\n"
                f"Hand-object interactions: {', '.join(hand_object_contacts)}"
            )
        else:
            combined_input = f"Transcript: {chunk_text}"

        print(f"\n[{i}] Extracting knowledge for chunk {chunk_id}")
        print(combined_input)

        extracted_knowledge = chatbot.extract_valuable_knowledge(combined_input)

        if extracted_knowledge:
            for triplet in extracted_knowledge:
                triplet["start"] = start_time
                triplet["end"] = end_time
            chatbot.save_knowledge(extracted_knowledge)

    print("\nKennisextractie afgerond. Bekijk 'knowledge.json' voor de resultaten.")

if __name__ == "__main__":
    main()
