In [41]:
import cv2
import sqlite3
import math
import numpy as np
from datetime import timedelta

# YOLOv8 from Ultralytics
from ultralytics import YOLO

# BLIP for object captioning
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image


In [42]:

DB_NAME = "drone_security.db"

# 1) DATABASE INITIALIZATION
def init_db(db_name=DB_NAME):
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    
    # Table for storing each detection
    c.execute('''
        CREATE TABLE IF NOT EXISTS detections (
            detection_id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT,
            yolo_label TEXT,
            color TEXT,
            confidence REAL,
            blip_caption TEXT
        )
    ''')
    
    # Table for alerts
    c.execute('''
        CREATE TABLE IF NOT EXISTS alerts (
            alert_id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT,
            alert_message TEXT
        )
    ''')

    # NEW: Table for telemetry
    c.execute('''
        CREATE TABLE IF NOT EXISTS telemetry (
            telemetry_id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT,
            latitude REAL,
            longitude REAL,
            altitude REAL
        )
    ''')
    
    conn.commit()
    conn.close()

init_db()
print("Database initialized.")


Database initialized.


In [None]:

yolo_model = YOLO("yolov8n.pt")  
print("YOLOv8 model loaded.")

# BLIP
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
print("BLIP model loaded.")


YOLOv8 model loaded.
BLIP model loaded.


In [44]:


def approximate_color_name(r, g, b):
    """
    Simple mapping of (r, g, b) to a named color. 
    Naive approach, can be improved if needed.
    """
    color_map = {
        "red": (255, 0, 0),
        "green": (0, 255, 0),
        "blue": (0, 0, 255),
        "yellow": (255, 255, 0),
        "white": (255, 255, 255),
        "black": (0, 0, 0),
        "gray": (128, 128, 128),
        "silver": (192, 192, 192)
    }
    
    min_dist = float("inf")
    chosen_color = "unknown"
    for cname, (cr, cg, cb) in color_map.items():
        dist = math.sqrt((r - cr)**2 + (g - cg)**2 + (b - cb)**2)
        if dist < min_dist:
            min_dist = dist
            chosen_color = cname
    return chosen_color

def get_box_color(frame, box):
    """
    Crop bounding box from frame and compute approximate color name.
    """
    x1, y1, x2, y2 = map(int, box)
    h, w, _ = frame.shape
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w-1, x2), min(h-1, y2)
    
    if x2 <= x1 or y2 <= y1:
        return "unknown"
    
    crop = frame[y1:y2, x1:x2]
    mean_bgr = crop.reshape(-1, 3).mean(axis=0)
    b, g, r = mean_bgr
    return approximate_color_name(r, g, b)

def caption_object_with_blip(frame, box):
    """
    Crop bounding box from frame, pass it to BLIP for a short caption.
    """
    x1, y1, x2, y2 = map(int, box)
    h, w, _ = frame.shape
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w-1, x2), min(h-1, y2)
    
    if x2 <= x1 or y2 <= y1:
        return "N/A"
    
    crop = frame[y1:y2, x1:x2]
    pil_image = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
    
    inputs = blip_processor(images=pil_image, return_tensors="pt")
    output_ids = blip_model.generate(**inputs, max_new_tokens=30)
    caption = blip_processor.decode(output_ids[0], skip_special_tokens=True)
    return caption

def simulate_telemetry_data(frame_seconds):
    """
    Simulate drone telemetry (latitude, longitude, altitude) for demonstration.
    Replace with real or more advanced simulation logic as needed.
    """
    # For a simple example, let's increment lat/lon/alt slightly each second
    base_lat = 37.7749
    base_lon = -122.4194
    base_alt = 100.0
    
    # e.g., add small increments per second
    lat = base_lat + (frame_seconds * 0.0001)
    lon = base_lon + (frame_seconds * 0.0001)
    alt = base_alt + (frame_seconds * 0.01)
    
    return lat, lon, alt

# 4) VIDEO ANALYSIS & LOGGING
def analyze_video(video_path, db_name=DB_NAME, skip_frames=30):
    """
    - Reads video frame by frame.
    - Simulates telemetry for each frame, stores in 'telemetry' table.
    - Runs YOLOv8 detection on every nth frame (skip_frames).
    - For each bounding box that is a relevant object (car, truck, bus, etc.), 
      we crop it and use BLIP to get a more detailed caption.
    - We also approximate the color from the bounding box.
    - Logs the detection (yolo_label, color, confidence, BLIP caption) with a timestamp in the DB.
    - Generates an alert if a silver car is detected.
    """
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_index = 0
    
    # List of YOLO classes we want to analyze in detail
    relevant_labels = {"car", "truck", "bus", "motorcycle"}
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Compute the time in seconds for the current frame
        frame_time_seconds = int(frame_index / fps)
        video_timestamp = str(timedelta(seconds=frame_time_seconds))
        
        # -- TELEMETRY SIMULATION & STORAGE --
        lat, lon, alt = simulate_telemetry_data(frame_time_seconds)
        c.execute('''
            INSERT INTO telemetry (timestamp, latitude, longitude, altitude)
            VALUES (?, ?, ?, ?)
        ''', (video_timestamp, lat, lon, alt))
        
        # -- ONLY DETECT EVERY 'skip_frames' FRAMES --
        if frame_index % skip_frames == 0:
            results = yolo_model.predict(frame, conf=0.3)
            if len(results) > 0:
                for box in results[0].boxes:
                    cls_id = int(box.cls[0])
                    label = results[0].names[cls_id]  # e.g. "car", "truck"
                    conf = float(box.conf[0])
                    xyxy = box.xyxy[0]
                    
                    if label in relevant_labels:
                        # 1) Approx color
                        color_name = get_box_color(frame, xyxy)
                        # 2) BLIP caption
                        obj_caption = caption_object_with_blip(frame, xyxy)
                        
                        # Insert detection into DB
                        c.execute('''
                            INSERT INTO detections (timestamp, yolo_label, color, confidence, blip_caption)
                            VALUES (?, ?, ?, ?, ?)
                        ''', (video_timestamp, label, color_name, conf, obj_caption))
                        
                        # Print logs
                        print(f"Logs: {color_name} {label} (conf={conf:.2f}) "
                              f"| BLIP: {obj_caption} | Time: {video_timestamp} "
                              f"| Telemetry: lat={lat:.5f}, lon={lon:.5f}, alt={alt:.2f}")
                        
                        # -- ALERT GENERATION: SILVER CAR --
                        if label == "car" and color_name == "silver":
                            alert_message = (f"ALERT: Silver car detected at {video_timestamp} "
                                             f"(lat={lat:.5f}, lon={lon:.5f}, alt={alt:.2f})")
                            c.execute('''
                                INSERT INTO alerts (timestamp, alert_message)
                                VALUES (?, ?)
                            ''', (video_timestamp, alert_message))
                            print(alert_message)
        
        frame_index += 1
    
    conn.commit()
    conn.close()
    cap.release()
    print("Video analysis complete.")

# 5) RUN THE ANALYSIS


In [45]:
VIDEO_PATH = "/Users/adarshpandey/Desktop/Project FlytBase/data/Test Video.mp4"  # Replace with your own video path
analyze_video(VIDEO_PATH, skip_frames=30)



0: 384x640 3 cars, 80.7ms
Speed: 12.1ms preprocess, 80.7ms inference, 14.7ms postprocess per image at shape (1, 3, 384, 640)
Logs: gray car (conf=0.72) | BLIP: a car driving down a road with a sign on it | Time: 0:00:00 | Telemetry: lat=37.77490, lon=-122.41940, alt=100.00
Logs: black car (conf=0.54) | BLIP: a blue car driving down a road | Time: 0:00:00 | Telemetry: lat=37.77490, lon=-122.41940, alt=100.00
Logs: gray car (conf=0.40) | BLIP: a truck driving down a highway with a red light | Time: 0:00:00 | Telemetry: lat=37.77490, lon=-122.41940, alt=100.00

0: 384x640 3 cars, 264.1ms
Speed: 10.9ms preprocess, 264.1ms inference, 7.2ms postprocess per image at shape (1, 3, 384, 640)
Logs: black car (conf=0.82) | BLIP: a car driving down a road with a sign on the side | Time: 0:00:00 | Telemetry: lat=37.77490, lon=-122.41940, alt=100.00
Logs: gray car (conf=0.43) | BLIP: a bus is driving down the road | Time: 0:00:00 | Telemetry: lat=37.77490, lon=-122.41940, alt=100.00
Logs: silver car

In [46]:

# 6) QUERY FUNCTIONS
def query_detections_by_label(label, db_name=DB_NAME):
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    query = """
        SELECT detection_id, timestamp, yolo_label, color, confidence, blip_caption 
        FROM detections 
        WHERE yolo_label = ?
    """
    c.execute(query, (label,))
    rows = c.fetchall()
    conn.close()
    return rows

def query_detections_by_color(color, db_name=DB_NAME):
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    query = """
        SELECT detection_id, timestamp, yolo_label, color, confidence, blip_caption 
        FROM detections 
        WHERE color = ?
    """
    c.execute(query, (color,))
    rows = c.fetchall()
    conn.close()
    return rows

def query_alerts(db_name=DB_NAME):
    """
    Fetch all generated alerts.
    """
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    c.execute("SELECT alert_id, timestamp, alert_message FROM alerts")
    rows = c.fetchall()
    conn.close()
    return rows

def query_telemetry(db_name=DB_NAME):
    """
    Fetch all telemetry records.
    """
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    c.execute("SELECT telemetry_id, timestamp, latitude, longitude, altitude FROM telemetry")
    rows = c.fetchall()
    conn.close()
    return rows

# Example usage:
print("\nAll cars detected:")
car_rows = query_detections_by_label("car")
for row in car_rows:
    print(row)

print("\nAll silver vehicles detected:")
silver_rows = query_detections_by_color("silver")
for row in silver_rows:
    print(row)

print("\nAll alerts generated:")
alert_rows = query_alerts()
for row in alert_rows:
    print(row)

print("\nAll telemetry records (first 5 shown):")
telemetry_rows = query_telemetry()
for row in telemetry_rows[:5]:
    print(row)

# 7) QA & TEST CASES
def test_detections_logged(db_name=DB_NAME):
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    c.execute("SELECT COUNT(*) FROM detections")
    count = c.fetchone()[0]
    conn.close()
    if count == 0:
        print("Test Failed: No detections found in DB.")
    else:
        print(f"Test Passed: {count} detections found in DB.")

# Run QA
test_detections_logged()



All cars detected:
(1, '0', 'car', 'gray', 0.7188957929611206, 'a car driving down a road with a sign on it')
(2, '0', 'car', 'black', 0.5435592532157898, 'a blue car driving down a road')
(3, '0', 'car', 'gray', 0.4004042446613312, 'a truck driving down a highway with a red light')
(4, '0', 'car', 'black', 0.8175540566444397, 'a car driving down a road with a sign on the side')
(5, '0', 'car', 'gray', 0.42921680212020874, 'a bus is driving down the road')
(6, '0', 'car', 'silver', 0.31140992045402527, 'a white van parked in a parking lot')
(7, '1', 'car', 'black', 0.6088934540748596, 'a car driving down a road with a person in the back')
(8, '1', 'car', 'gray', 0.3537733852863312, 'a white van is parked in the dark')
(10, '2', 'car', 'gray', 0.40786299109458923, 'a white van is parked in the dark')
(11, '2', 'car', 'gray', 0.3873065114021301, 'a truck with a yellow cab and a black cab')
(13, '3', 'car', 'gray', 0.6460999250411987, 'the bmw x6 is seen in this aerial image')
(14, '3', 

In [47]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyBDS-dAWUkBr8Figh-Jro_sgY6q8nKSb60"  # Replace with your actual API key


from langchain_google_genai import ChatGoogleGenerativeAI

# Gemini 1.5 Pro model setup
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.4)

def summarize_video_with_gemini(frame_descriptions: list):
    joined_frames = "\n".join(frame_descriptions)
    prompt = (
        f"Here is a video described frame by frame:\n{joined_frames}\n\n"
        "Generate a 1-sentence summary of the video."
    )
    
    response = llm.invoke(prompt)
    return response.content.strip()

def ask_question_on_video(frames: list, user_question: str):
    context = "\n".join(frames)
    prompt = (
        f"Here is the frame-by-frame description of a video:\n{context}\n\n"
        f"Now answer the following question based on the video: {user_question}"
    )

    response = llm.invoke(prompt)
    return response.content.strip()

import sqlite3

# Connect to the DB
conn = sqlite3.connect("drone_security.db")
cursor = conn.cursor()

# Fetch the frame-wise object descriptions
cursor.execute("SELECT blip_caption FROM detections ORDER BY detection_id ASC")
rows = cursor.fetchall()

# Extract descriptions from query results
frames = [row[0] for row in rows if row[0]]  # ignore NULLs if any

conn.close()

video_summary = summarize_video_with_gemini(frames)
print("📽️ Video Summary:", video_summary)

# Follow-up Q&A
q1 = ask_question_on_video(frames, "What objects were seen in the video?")
q2 = ask_question_on_video(frames, "Was there any suspicious activity?")

print("🤖 Q1 Response:", q1)
print("🤖 Q2 Response:", q2)

📽️ Video Summary: This video shows a variety of vehicles, including cars, trucks, vans, buses, and police cars, driving and parked in various locations and conditions.
🤖 Q1 Response: The video primarily features various vehicles in motion or parked.  The most common objects are:

* **Cars:**  Many different cars, described by color (white, red, blue), type (van, truck, police car, school bus), and sometimes make and model (BMW X1, BMW X6, Chevrolet Camaro, Chevrolet Cruz, Ford Edge).
* **Trucks:** Including semi-trucks with trailers and trucks with different colored cabs.
* **Vans:**  Often white vans, sometimes with additional color details.
* **Buses:** Including school buses.
* **Police Cars:** Both in motion and parked.

Additionally, there are a few less common objects/scenes:

* **Road signs:**  Seen alongside the roads the vehicles are driving on.
* **Traffic lights:** At least one red light is mentioned.
* **Buildings:**  Some vehicles are seen driving next to buildings.
* **Pa