In [None]:
import os
import numpy as np
import torch
import open_clip
from qdrant_client import QdrantClient
from sklearn.metrics.pairwise import cosine_similarity
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip


device = "cuda" if torch.cuda.is_available() else "cpu"
COLLECTION_NAME = "Video_RAG"
RAW_VIDEO_DIR = "raw_data"
OUTPUT_DIR = "output_clips"
os.makedirs(OUTPUT_DIR, exist_ok=True)


model, _, preprocess = open_clip.create_model_and_transforms(
    "ViT-B-32", pretrained="openai"
)
tokenizer = open_clip.get_tokenizer("ViT-B-32")
model = model.to(device).eval()


client = QdrantClient(
    url="https://0ed1052e-7f23-462b-ab8a-70aaa05c675f.us-west-2-0.aws.cloud.qdrant.io",
    api_key=os.getenv(
        "QDRANT_API_KEY",
        "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.svTWXXiOcA-W59rQFl-g_qekNafaa95ozDuL3lq6s_4",
    ),
)


def encode_query_text(text):
    tokens = tokenizer([text]).to(device)
    with torch.no_grad():
        vec = model.encode_text(tokens).squeeze().cpu().numpy()
    return vec


def retrieve_video_chunks_hybrid(query):
    query_vec = encode_query_text(query)
    results = client.scroll(
        collection_name=COLLECTION_NAME, scroll_filter=None, limit=1000
    )
    all_chunks = [r.payload for r in results[0]]
    if not all_chunks:
        return []


    vecs = [encode_query_text(c["text"]) for c in all_chunks]
    sims = cosine_similarity([query_vec], vecs)[0]
    top_vec_idxs = np.argsort(sims)[::-1][:3]


    query_keywords = query.lower().split()
    keyword_scores = []
    for i, c in enumerate(all_chunks):
        text = c.get("text", "").lower()
        score = sum(1 for kw in query_keywords if kw in text)
        if score > 0:
            keyword_scores.append((i, score))
    keyword_scores.sort(key=lambda x: -x[1])
    top_kw_idxs = [idx for idx, _ in keyword_scores[:3]]


    selected_idxs = list(dict.fromkeys(top_vec_idxs.tolist() + top_kw_idxs))
    chunks = [all_chunks[i] for i in selected_idxs if i < len(all_chunks)]
    return chunks


def hms_to_seconds(hms):
    parts = list(map(int, hms.split(":")))
    if len(parts) == 3:
        h, m, s = parts
    elif len(parts) == 2:
        h = 0
        m, s = parts
    else:
        raise ValueError(f"Invalid time format: {hms}")
    return h * 3600 + m * 60 + s


def cut_clip(video_file, start_hms, end_hms, save_path):
    start_sec = hms_to_seconds(start_hms)
    end_sec = hms_to_seconds(end_hms)
    ffmpeg_extract_subclip(video_file, start_sec, end_sec, outputfile=save_path)


def process_query(title, query):
    print(f"🔍 Query: {title}")
    chunks = retrieve_video_chunks_hybrid(query)


    sub_output_dir = os.path.join(OUTPUT_DIR, title.replace(" ", "_"))
    os.makedirs(sub_output_dir, exist_ok=True)

    for idx, chunk in enumerate(chunks):
        video_id = chunk["video"]
        start = chunk["start"]
        end = chunk["end"]

        video_path = os.path.join(RAW_VIDEO_DIR, f"{video_id}.mkv")
        if not os.path.exists(video_path):
            print(f"⚠️ Warning:  {video_path}DNE")
            continue

        output_filename = f"{video_id}_chunk_{hms_to_seconds(start)}_{hms_to_seconds(end)}.mkv"
        output_path = os.path.join(sub_output_dir, output_filename)

        print(f" {video_path} [{start} - {end}] ➔ {output_path}")
        cut_clip(video_path, start, end, output_path)

    print("\n" + "-"*60 + "\n")


queries = {
    "ResNet explanation": "Using only the videos, explain how ResNets work.",
    "CNN vs FCN": "Using only the videos, explain the advantages of CNNs over fully connected networks.",
    "Binary cross entropy": "Using only the videos, explain the the binary cross entropy loss function.",
}

for title, query in queries.items():
    process_query(title, query)



🔍 Query: ResNet explanation
 raw_data/video_22.mkv [0:03:30 - 0:04:00] ➔ output_clips/ResNet_explanation/video_22_chunk_210_240.mkv
MoviePy - Running:
>>> /workspaces/eng-ai-agents/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-aarch64-v7.0.2 -y -ss 210.00 -i raw_data/video_22.mkv -t 30.00 -map 0 -vcodec copy -acodec copy -copyts output_clips/ResNet_explanation/video_22_chunk_210_240.mkv
MoviePy - Command successful
 raw_data/video_05.mkv [0:23:00 - 0:23:30] ➔ output_clips/ResNet_explanation/video_05_chunk_1380_1410.mkv
MoviePy - Running:
>>> /workspaces/eng-ai-agents/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-aarch64-v7.0.2 -y -ss 1380.00 -i raw_data/video_05.mkv -t 30.00 -map 0 -vcodec copy -acodec copy -copyts output_clips/ResNet_explanation/video_05_chunk_1380_1410.mkv
MoviePy - Command successful
 raw_data/video_22.mkv [0:02:30 - 0:03:00] ➔ output_clips/ResNet_explanation/video_22_chunk_150_180.mkv
MoviePy - Running:
>>> /works