In [None]:
import cv2
import torch
import numpy as np
import os
from sklearn.cluster import KMeans
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from google.colab import files
import chromadb # Yeni import
from sentence_transformers import SentenceTransformer # Yeni import

# --- AYARLAR ---
SKIP_FRAMES = 30       # Her 30 karede bir iÅŸlem yap (YaklaÅŸÄ±k 1 FPS)
BLUR_THRESHOLD = 100.0 # BulanÄ±klÄ±k eÅŸiÄŸi (DÃ¼ÅŸÃ¼k deÄŸerler daha bulanÄ±k kabul edilir)
MIN_CLUSTERS = 5       # En az kaÃ§ keyframe olsun
MAX_CLUSTERS = 20      # En fazla kaÃ§ keyframe olsun

# 1. CihazÄ± Belirle (Ã–nceki hÃ¼creden alÄ±nmÄ±ÅŸtÄ±r)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"ðŸš€ KULLANILAN CÄ°HAZ: {device}")

# H100 vb. iÃ§in HÄ±zlandÄ±rma AyarlarÄ±
if device == "cuda":
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

# 2. Model YÃ¼kleme (DINOv2 - Large)
print("DINOv2 (Large) Modeli YÃ¼kleniyor...")
dinov2 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
dinov2.to(device)
dinov2.eval()

# 3. Video Ä°ÅŸleme
video_path = "/content/video.mp4"
if not os.path.exists(video_path):
    print("Video bulunamadÄ±, lÃ¼tfen yÃ¼kleyiniz:")
    uploaded = files.upload()
    # Fix: Rename the uploaded file to match video_path
    for filename in uploaded.keys():
        if filename != os.path.basename(video_path):
            os.rename(filename, video_path)
            print(f"Uploaded file '{filename}' renamed to '{video_path}'.")

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30 # Hata Ã¶nleyici

frames = []
frame_ids = []
frame_count = 0

print("Video taranÄ±yor (Ã–rnekleme + BulanÄ±klÄ±k KontrolÃ¼)...")
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 1. AdÄ±m: Ã–rnekleme (Sadece belirli aralÄ±klarla kare al)
    if frame_count % SKIP_FRAMES == 0:
        # 2. AdÄ±m: BulanÄ±klÄ±k KontrolÃ¼
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()

        if laplacian_var > BLUR_THRESHOLD:
            frames.append(frame)
            frame_ids.append(frame_count)

    frame_count += 1

cap.release()
print(f"Toplam Kare: {frame_count}")
print(f"SeÃ§ilen Net Kare SayÄ±sÄ±: {len(frames)}")

if len(frames) == 0:
    raise ValueError("HiÃ§ uygun kare bulunamadÄ±! EÅŸik deÄŸerini dÃ¼ÅŸÃ¼rmeyi deneyin.")

# 4. Embedding Ã‡Ä±karma
embeddings_dinov2 = [] # Dinov2 embeddingleri iÃ§in yeni isim
print("DINOv2 Embeddingler Ã§Ä±karÄ±lÄ±yor...")

for frame in frames:
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    # DINOv2 iÃ§in resize
    image = image.resize((224, 224))
    image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0
    image_tensor = image_tensor.unsqueeze(0).to(device)

    with torch.no_grad():
        embedding = dinov2(image_tensor)

    embeddings_dinov2.append(embedding.cpu().numpy()[0])

embeddings_dinov2 = np.array(embeddings_dinov2)

# 5. Dinamik KÃ¼meleme (Keyframe SeÃ§imi)
# Ã–rnek: Her 10 saniye iÃ§in 1 keyframe, ama min 5 max 20 arasÄ±nda tut.
duration_sec = frame_count / fps
dynamic_k = int(duration_sec / 10)
NUM_CLUSTERS = max(MIN_CLUSTERS, min(dynamic_k, MAX_CLUSTERS, len(frames)))

print(f"Video SÃ¼resi: {duration_sec:.1f}sn -> Hedeflenen Keyframe SayÄ±sÄ±: {NUM_CLUSTERS}")

kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=42, n_init=10) # n_init eklendi
labels = kmeans.fit_predict(embeddings_dinov2)
keyframe_indices = []

for cluster_id in range(NUM_CLUSTERS):
    cluster_indices = np.where(labels == cluster_id)[0]
    cluster_embeddings = embeddings_dinov2[cluster_indices]
    center = kmeans.cluster_centers_[cluster_id]
    distances = np.linalg.norm(cluster_embeddings - center, axis=1)
    best = cluster_indices[np.argmin(distances)]
    keyframe_indices.append(best)

keyframe_indices = sorted(keyframe_indices)

# 6. Captioning (BLIP-2) ve ChromaDB Entegrasyonu
print("BLIP-2 Modeli YÃ¼kleniyor...")
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
blip_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)

# Sentence Transformer modeli yÃ¼kle (Tekrar tanÄ±mlandÄ±, Ã¶nceki adÄ±mda da vardÄ±, gÃ¼venli olsun diye burada tekrar)
sentence_model = SentenceTransformer('all-MiniLM-L6-v2').to(device)

# ChromaDB istemcisi ve koleksiyonu baÅŸlat (Tekrar tanÄ±mlandÄ±, Ã¶nceki adÄ±mda da vardÄ±, gÃ¼venli olsun diye burada tekrar)
chroma_client = chromadb.Client()
keyframe_collection = chroma_client.get_or_create_collection(name="keyframe_captions")

# ChromaDB'yi temizle (Her Ã§alÄ±ÅŸtÄ±rmada sÄ±fÄ±rdan baÅŸlamak iÃ§in)
existing_ids = keyframe_collection.get()['ids']
if existing_ids:
    keyframe_collection.delete(ids=existing_ids) # TÃ¼m mevcut verileri sil
    print("ChromaDB koleksiyonu temizlendi.")
else:
    print("ChromaDB koleksiyonu zaten boÅŸ.")


print("\n--- Keyframe'ler Ä°ÅŸleniyor ve ChromaDB'ye Kaydediliyor ---")
caption_data = [] # TÃ¼m caption'larÄ± tutmak iÃ§in

for i, idx in enumerate(keyframe_indices):
    frame = frames[idx]
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    inputs = processor(image, return_tensors="pt").to(device)

    with torch.no_grad():
        output = blip_model.generate(**inputs, max_new_tokens=30)

    caption = processor.decode(output[0], skip_special_tokens=True)

    # Caption embedding'ini oluÅŸtur
    caption_embedding = sentence_model.encode(caption).tolist()

    # Metadata hazÄ±rla
    frame_id = str(frame_ids[idx])
    # FIX: Store timestamp as float instead of string
    timestamp_float = float(frame_ids[idx]/fps) # Convert to float
    timestamp_str = f"{timestamp_float:.1f}s" # For display purposes
    unique_id = f"keyframe_{i}"

    # ChromaDB'ye ekle
    keyframe_collection.add(
        embeddings=[caption_embedding],
        metadatas=[{"frame_id": frame_id, "timestamp": timestamp_float}], # Store float
        documents=[caption],
        ids=[unique_id]
    )
    caption_data.append(f"Frame {frame_id} ({timestamp_str}): {caption}")

    print(f"Frame {frame_id} ({timestamp_str}): {caption}")

print("\nâœ… Keyframe'ler iÅŸlendi ve ChromaDB'ye kaydedildi.")

# 7. Ã–rnek Anlamsal Arama Sorgusu
print("\n--- Ã–rnek Anlamsal Arama (ChromaDB) ---")
query_text = "they are fighting"
query_embedding = sentence_model.encode(query_text).tolist()

print(f"Sorgu: '{query_text}' iÃ§in arama yapÄ±lÄ±yor...")

results = keyframe_collection.query(
    query_embeddings=[query_embedding],
    n_results=3, # En yakÄ±n 3 sonucu getir
    include=['documents', 'metadatas', 'distances']
)

print("\n--- Arama SonuÃ§larÄ± ---")
if results['documents']:
    for i, doc in enumerate(results['documents'][0]):
        metadata = results['metadatas'][0][i]
        distance = results['distances'][0][i]
        # FIX: Ensure timestamp is displayed correctly after float storage
        display_timestamp = f"{metadata['timestamp']:.1f}s" if isinstance(metadata['timestamp'], float) else metadata['timestamp']
        print(f"  SonuÃ§ {i+1} (Mesafe: {distance:.2f}):")
        print(f"    Frame ID: {metadata['frame_id']}, Zaman: {display_timestamp}")
        print(f"    AÃ§Ä±klama: {doc}")
else:
    print("HiÃ§ sonuÃ§ bulunamadÄ±.")

print("âœ… Anlamsal arama tamamlandÄ±.")

print("\n--- GeliÅŸmiÅŸ Anlamsal Arama (ChromaDB) ---")




In [None]:
# 1. FarklÄ± bir query_text belirleyin
query_text_advanced = "they are celebrating"
print(f"GeliÅŸmiÅŸ Sorgu: '{query_text_advanced}' iÃ§in arama yapÄ±lÄ±yor...")

# 2. Yeni query_text iÃ§in embedding oluÅŸturun
query_embedding_advanced = sentence_model.encode(query_text_advanced).tolist()

# 3. ve 4. ChromaDB'de daha fazla sonuÃ§ ve metadata filtrelemesi ile arama yapÄ±n
# Ã–rnek: YalnÄ±zca 100 saniyeden sonraki kareleri iÃ§eren sonuÃ§larÄ± getir
results_advanced = keyframe_collection.query(
    query_embeddings=[query_embedding_advanced],
    n_results=5, # En yakÄ±n 5 sonucu getir
    where={
        "timestamp": {"$gt": 100.0} # FIX: Compare against float instead of string
    },
    include=['documents', 'metadatas', 'distances']
)

print("\n--- GeliÅŸmiÅŸ Arama SonuÃ§larÄ± ---")
if results_advanced['documents']:
    for i, doc in enumerate(results_advanced['documents'][0]):
        metadata = results_advanced['metadatas'][0][i]
        distance = results_advanced['distances'][0][i]
        # FIX: Ensure timestamp is displayed correctly after float storage
        display_timestamp_advanced = f"{metadata['timestamp']:.1f}s" if isinstance(metadata['timestamp'], float) else metadata['timestamp']
        print(f"  SonuÃ§ {i+1} (Mesafe: {distance:.2f}):")
        print(f"    Frame ID: {metadata['frame_id']}, Zaman: {display_timestamp_advanced}")
        print(f"    AÃ§Ä±klama: {doc}")
else:
    print("HiÃ§ sonuÃ§ bulunamadÄ±.")

print("âœ… GeliÅŸmiÅŸ anlamsal arama tamamlandÄ±.")

In [None]:
# 1. FarklÄ± bir query_text belirleyin
query_text_advanced = "Security forces or police operations"
print(f"GeliÅŸmiÅŸ Sorgu: '{query_text_advanced}' iÃ§in arama yapÄ±lÄ±yor...")

# 2. Yeni query_text iÃ§in embedding oluÅŸturun
query_embedding_advanced = sentence_model.encode(query_text_advanced).tolist()

# 3. ve 4. ChromaDB'de daha fazla sonuÃ§ ve metadata filtrelemesi ile arama yapÄ±n
# Ã–rnek: YalnÄ±zca 100 saniyeden sonraki kareleri iÃ§eren sonuÃ§larÄ± getir
results_advanced = keyframe_collection.query(
    query_embeddings=[query_embedding_advanced],
    n_results=5, # En yakÄ±n 5 sonucu getir
    where={
        "timestamp": {"$gt": 100.0} # FIX: Compare against float instead of string
    },
    include=['documents', 'metadatas', 'distances']
)

print("\n--- GeliÅŸmiÅŸ Arama SonuÃ§larÄ± ---")
if results_advanced['documents']:
    for i, doc in enumerate(results_advanced['documents'][0]):
        metadata = results_advanced['metadatas'][0][i]
        distance = results_advanced['distances'][0][i]
        # FIX: Ensure timestamp is displayed correctly after float storage
        display_timestamp_advanced = f"{metadata['timestamp']:.1f}s" if isinstance(metadata['timestamp'], float) else metadata['timestamp']
        print(f"  SonuÃ§ {i+1} (Mesafe: {distance:.2f}):")
        print(f"    Frame ID: {metadata['frame_id']}, Zaman: {display_timestamp_advanced}")
        print(f"    AÃ§Ä±klama: {doc}")
else:
    print("HiÃ§ sonuÃ§ bulunamadÄ±.")

print("âœ… GeliÅŸmiÅŸ anlamsal arama tamamlandÄ±.")

In [None]:
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt # Ä°steÄŸe baÄŸlÄ± olarak Ä±zgara dÃ¼zeni iÃ§in kullanÄ±labilir

# Video yakalama nesnesini yeniden aÃ§
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise IOError("Video dosyasÄ± aÃ§Ä±lamadÄ±!")

# Orijinal FPS'i al, zaman dÃ¶nÃ¼ÅŸtÃ¼rme iÃ§in gerekli
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30 # Bulunamazsa varsayÄ±lan deÄŸer

print("\n--- ChromaDB Arama SonuÃ§larÄ±nÄ±n GÃ¶rselleÅŸtirilmesi ---")

# Ã–nceki hÃ¼creden gelen results_advanced'Ä± kullan
if results_advanced['documents']:
    for i, doc in enumerate(results_advanced['documents'][0]):
        metadata = results_advanced['metadatas'][0][i]
        frame_id = int(metadata['frame_id'])
        timestamp_float = metadata['timestamp']
        distance = results_advanced['distances'][0][i]

        # Kareye git
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        ret, frame = cap.read()

        if ret:
            # Kareyi etiketle
            caption = doc.strip()
            text_frame_id = f"Frame ID: {frame_id}"
            text_timestamp = f"Time: {timestamp_float:.1f}s"
            text_caption = f"Caption: {caption}"
            text_distance = f"Distance: {distance:.2f}"

            # GÃ¶rÃ¼ntÃ¼lemek iÃ§in metin hazÄ±rla
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.7
            font_thickness = 2
            text_color = (0, 255, 255) # SarÄ±
            bg_color = (0, 0, 0) # Metin iÃ§in siyah arka plan

            y0, dy = 30, 30
            for k, line in enumerate([text_frame_id, text_timestamp, text_distance, text_caption]):
                y = y0 + k * dy
                (text_w, text_h), _ = cv2.getTextSize(line, font, font_scale, font_thickness)
                # Daha iyi okunabilirlik iÃ§in metnin arkasÄ±na siyah dikdÃ¶rtgen Ã§iz
                cv2.rectangle(frame, (5, y - text_h - 5), (5 + text_w + 5, y + 5), bg_color, -1)
                cv2.putText(frame, line, (10, y), font, font_scale, text_color, font_thickness, cv2.LINE_AA)

            print(f"\nGÃ¶rselleÅŸtirilen SonuÃ§ {i+1}:")
            print(f"  {text_frame_id}, {text_timestamp}, {text_distance}")
            print(f"  {text_caption}")
            cv2_imshow(frame)
        else:
            print(f"Frame {frame_id} okunamadÄ±.")
else:
    print("GÃ¶rselleÅŸtirilecek arama sonucu bulunamadÄ±.")

cap.release()
print("\nâœ… Keyframe gÃ¶rselleÅŸtirme tamamlandÄ±.")