## Downloading and Storing the test Videos

In [2]:
video_urls = [
    "https://www.youtube.com/watch?v=DhmZ6W1UAv4",
    "https://www.youtube.com/watch?v=YrydHPwRelI"
]

local_paths = [
    "C:/Users/algba/AI_Assignments/A3/video1.mp4",
    "C:/Users/algba/AI_Assignments/A3/video2.mp4"
]


### Function to download videos 

In [3]:
import os
from yt_dlp import YoutubeDL

def download_video(url, local_path):
    ydl_opts = {
        'outtmpl': local_path,
        'format': 'mp4'
    }
    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

for url, path in zip(video_urls, local_paths):
    download_video(url, path)


In [4]:
for path in local_paths:
    if os.path.exists(path):
        print(f"Video downloaded successfully: {path}")
    else:
        print(f"Failed to download video: {path}")


### Split Videos into Frames

In [6]:
import cv2
import os
import numpy as np

In [7]:
def split_video_to_frames(video_path, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap = cv2.VideoCapture(video_path)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_path = os.path.join(output_dir, f"frame_{frame_count:04d}.png")
        cv2.imwrite(frame_path, frame)
        frame_count += 1

    cap.release()
    print(f"Extracted {frame_count} frames from {video_path}")

video_paths = [
    "C:/Users/algba/AI_Assignments/A3/video1.mp4",
    "C:/Users/algba/AI_Assignments/A3/video2.mp4"
]

for video_path in video_paths:
    output_dir = os.path.splitext(video_path)[0] + "_frames"
    split_video_to_frames(video_path, output_dir)

### Preprocess Frames
##### Video 2 had a lot of frames to preprocess and I ran out of memory to continue. So just focusing on Video 1 but code will take any other video

In [8]:
def preprocess_frames(input_dir, output_file):
    frame_files = [f for f in os.listdir(input_dir) if f.endswith('.png')]
    frame_files.sort()

    processed_frames = []
    for frame_file in frame_files:
        frame_path = os.path.join(input_dir, frame_file)
        frame = cv2.imread(frame_path)

        # Resize frame
        resized_frame = cv2.resize(frame, (224, 224))

        # Convert color from BGR to RGB
        rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)

        # Normalize the frame
        normalized_frame = rgb_frame / 255.0

        processed_frames.append(normalized_frame)

    # Save processed frames to a .npy file
    np.save(output_file, np.array(processed_frames))
    print(f"Saved processed frames to {output_file}")

for video_path in video_paths:
    input_dir = os.path.splitext(video_path)[0] + "_frames"
    output_file = os.path.splitext(video_path)[0] + "_frames.npy"
    preprocess_frames(input_dir, output_file)


## Detect Drone in Frames

In [9]:
# Importing pre-trained YOLO model
import torch
import torchvision.transforms as T
from PIL import Image
import numpy as np
import os
import cv2

### Creating virtual environment (I was having issues with some modules that I had to update for other stuff) 

In [12]:
cd C:/Users/algba/AI_Assignments/A3

C:\Users\algba\AI_Assignments\A3


In [14]:
!virtualenv venv

created virtual environment CPython3.9.13.final.0-64 in 2415ms
  creator CPython3Windows(dest=C:\Users\algba\AI_Assignments\A3\venv, clear=False, no_vcs_ignore=False, global=False)
  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=C:\Users\algba\AppData\Local\pypa\virtualenv)
    added seed packages: pip==24.1, setuptools==70.1.0, wheel==0.43.0
  activators BashActivator,BatchActivator,FishActivator,NushellActivator,PowerShellActivator,PythonActivator


In [19]:
!git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...


In [33]:
import torch

model = torch.hub.load('./yolov5', 'yolov5s', source='local')  # Load model from local clone

YOLOv5  v7.0-338-gff063284 Python-3.9.13 torch-2.2.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


### Detecting objects in frames

In [31]:
import os
import numpy as np
import cv2
from PIL import Image
from torchvision import transforms as T

# Initialize YOLO model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device)

def detect_objects_in_frames(input_file, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    frames = np.load(input_file)
    transform = T.ToTensor()
    total_frames = len(frames)
    print(f"Total frames to process: {total_frames}")

    for i, frame in enumerate(frames):
        print(f"Processing frame {i+1}/{total_frames}")
        frame = (frame * 255).astype(np.uint8)
        frame_pil = Image.fromarray(frame)
        frame_tensor = transform(frame_pil).unsqueeze(0).to(device)

        # object detection
        results = model(frame_tensor)

        # Numpy array 
        results_np = results[0].detach().cpu().numpy()

        # Checking for drone detections
        detections_found = False
        for det in results_np:
            x1, y1, x2, y2, confidence, class_id = det[:6]
            if confidence > 0.3 and int(class_id) == 0:  # Assuming class 0 is for drone
                detections_found = True
                detection_path = os.path.join(output_dir, f"detection_{i:04d}.png")
                frame_with_detections = frame.copy()

                cv2.rectangle(frame_with_detections, 
                              (int(x1), int(y1)), 
                              (int(x2), int(y2)), 
                              (0, 255, 0), 2)

                cv2.imwrite(detection_path, cv2.cvtColor(frame_with_detections, cv2.COLOR_RGB2BGR))
                print(f"Detected drone in frame {i+1} with confidence {confidence}, bbox: {int(x1), int(y1), int(x2), int(y2)}")

        if not detections_found:
            print(f"No detections in frame {i+1}")

    print("Processing completed.")

# Process the frames from the first video
input_file = "C:/Users/algba/AI_Assignments/A3/video1_frames.npy"
output_dir = "C:/Users/algba/AI_Assignments/A3/video1_detections"
detect_objects_in_frames(input_file, output_dir)


Using cache found in C:\Users\algba/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-7-10 Python-3.9.13 torch-2.2.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Total frames to process: 4941
Processing frame 1/4941
Detected drone in frame 1 with confidence 0.46178361773490906, bbox: (102, 42, 7, 9)
Detected drone in frame 1 with confidence 0.5888111591339111, bbox: (102, 41, 8, 10)
Detected drone in frame 1 with confidence 0.608321487903595, bbox: (102, 41, 7, 10)
Detected drone in frame 1 with confidence 0.382612019777298, bbox: (102, 42, 8, 10)
Detected drone in frame 1 with confidence 0.5307198762893677, bbox: (103, 41, 8, 10)
Detected drone in frame 1 with confidence 0.5322533249855042, bbox: (102, 42, 8, 10)
Detected drone in frame 1 with confidence 0.34522736072540283, bbox: (103, 41, 9, 10)
Detected drone in frame 1 with confidence 0.3086092472076416, bbox: (102, 42, 9, 10)
Processing frame 2/4941
Detected drone in frame 2 with confidence 0.46178361773490906, bbox: (102, 42, 7, 9)
Detected drone in frame 2 with confidence 0.5888111591339111, bbox: (102, 41, 8, 10)
Detected drone in frame 2 with confidence 0.608321487903595, bbox: (102, 

### kalman Fiter for Object Tracking

In [26]:
import cv2
from filterpy.kalman import KalmanFilter
import numpy as np
import torch

# Initialize Kalman Filter
kf = KalmanFilter(dim_x=4, dim_z=2)  # 2D state (x, y, dx, dy)

# Initial coordinates based on detection
initial_x = 100
initial_y = 100

kf.x = np.array([initial_x, initial_y, 0., 0.])  # initial state: position and velocity
kf.F = np.array([[1, 0, 1, 0],
                 [0, 1, 0, 1],
                 [0, 0, 1, 0],
                 [0, 0, 0, 1]])  # state transition matrix

# Measurement matrix
kf.H = np.array([[1, 0, 0, 0],
                 [0, 1, 0, 0]])

# Measurement noise covariance
kf.R = np.eye(2) * 0.5

# Noise covariance
kf.Q = np.eye(4) * 0.1

# Loading YOLO model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Loading video
video_path = "C:/Users/algba/AI_Assignments/A3/video1.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print(f"Error opening video file at {video_path}")
    exit()

# Output video settings
output_path = "C:/Users/algba/AI_Assignments/A3/tracked_video1.mp4"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

if not out.isOpened():
    print(f"Error opening output video file at {output_path}")
    exit()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Performing detection of drone position using YOLO
    results = model(frame)
    detections = results.xyxy[0].cpu().numpy()
    
    # Finding the largest detection (drone)
    if len(detections) > 0:
        detection = max(detections, key=lambda x: x[4])  # x[4] is the confidence score
        x1, y1, x2, y2, confidence, class_id = detection
        detected_x = (x1 + x2) / 2
        detected_y = (y1 + y2) / 2
        
        # Updating Kalman filter with detected position
        kf.predict()
        kf.update([detected_x, detected_y])
        
        # Estimating state from Kalman filter
        estimated_x = int(kf.x[0])
        estimated_y = int(kf.x[1])
        estimated_dx = int(kf.x[2])
        estimated_dy = int(kf.x[3])
        
        # Calculating predicted next position based on estimated velocity
        predicted_x = estimated_x + estimated_dx
        predicted_y = estimated_y + estimated_dy
        
        # Updating bounding box coordinates based on predicted position
        bbox_x1 = predicted_x - 50
        bbox_y1 = predicted_y - 50
        bbox_x2 = predicted_x + 50
        bbox_y2 = predicted_y + 50
        
        # Drawing green rectangle based on updated bounding box coordinates
        cv2.rectangle(frame, (bbox_x1, bbox_y1), (bbox_x2, bbox_y2), (0, 255, 0), 2)
        
    # Writing the frame into the output video
    out.write(frame)
    
    cv2.imshow('Tracked Video Frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Model summary
cap.release()
out.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\algba/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-7-10 Python-3.9.13 torch-2.2.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
