In [1]:
import copy
import os
import shutil

import cv2
import torch
import yaml

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pytube import YouTube
from filterpy.kalman import KalmanFilter

In [2]:
VIDEO_DIR = './videos'
IMAGE_DIR = './images'
DETECT_DIR = './detections'
KF_DIR = './kfs'
OUTPUT_DIR = './outputs'

DATASET_SRC = './Database1/Database1'
DATASET_DST = './datasets'

VIDEO_URLS = ['https://www.youtube.com/watch?v=DhmZ6W1UAv4', 'https://www.youtube.com/watch?v=YrydHPwRelI']
VIDEO_FILENAMES = ['video1.mp4', 'video2.mp4']

# 1080p 30fps
ITAG = 137

TARGET = 'drone'
TARGET_INDEX = '0'
BATCH_SIZE = 128

FINE_TUNE_CONFIG = './custom.yaml'

WINDOW_SIZE = 30

# os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
# Download video
if not os.path.exists(VIDEO_DIR):
    os.mkdir(VIDEO_DIR)

for i, URL in enumerate(VIDEO_URLS):
    if os.path.exists(f'{VIDEO_DIR}/{VIDEO_FILENAMES[i]}'):
        print(f'File {VIDEO_FILENAMES[i]} existed.')
    else:
        video = YouTube(URL)
        stream = video.streams.get_by_itag(ITAG)
        stream.download(filename=f'{VIDEO_DIR}/{VIDEO_FILENAMES[i]}')

File video1.mp4 existed.
File video2.mp4 existed.


In [4]:
# Slice image
if not os.path.exists(IMAGE_DIR):
    os.mkdir(IMAGE_DIR)

for FILE in VIDEO_FILENAMES:
    prefix = FILE.split('.')[0]     # e.g: video1
    video = cv2.VideoCapture(f'{VIDEO_DIR}/{FILE}')
    frame_count = 0
    while True:
        ret, frame = video.read()
        if not ret:
            break
        cv2.imwrite(f'{IMAGE_DIR}/{prefix}_{frame_count}.png', frame)
        frame_count += 1
    video.release()
    print(f'Video file {FILE} is divided into {frame_count} frames.')

Video file video1.mp4 is divided into 4941 frames.
Video file video2.mp4 is divided into 15409 frames.


In [5]:
# Download dataset and unzip
!kaggle datasets download -d sshikamaru/drone-yolo-detection
!unzip -q drone-yolo-detection.zip

Downloading drone-yolo-detection.zip to /Resource/fisher_file/summer/acs/ai/hw3
100%|████████████████████████████████████████| 157M/157M [57:45<00:00, 52.3kB/s]
100%|████████████████████████████████████████| 157M/157M [57:45<00:00, 47.6kB/s]


In [6]:
# Download dataset and preprocess
if not os.path.exists(DATASET_DST):
    os.mkdir(DATASET_DST)
    os.mkdir(f'{DATASET_DST}/images')
    os.mkdir(f'{DATASET_DST}/labels')


for filename in os.listdir(DATASET_SRC):
    if filename.endswith('.txt'):
        prefix = filename.split('.')[0]
        shutil.copy(f'{DATASET_SRC}/{prefix}.txt', f'{DATASET_DST}/labels')
        shutil.copy(f'{DATASET_SRC}/{prefix}.JPEG', f'{DATASET_DST}/images')

In [7]:
# Origin model
model_origin = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /home/fisher/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-11-8 Python-3.9.16 torch-2.0.1+cu117 CUDA:0 (Tesla V100-SXM2-32GB, 32511MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:07<00:00, 2.00MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [8]:
# Clone yolov5 repo and install requirements.txt
!git clone https://github.com/ultralytics/yolov5
%pip install -r yolov5/requirements.txt

Cloning into 'yolov5'...
remote: Enumerating objects: 16057, done.[K
remote: Total 16057 (delta 0), reused 0 (delta 0), pack-reused 16057[K
Receiving objects: 100% (16057/16057), 14.66 MiB | 1.89 MiB/s, done.
Resolving deltas: 100% (11028/11028), done.
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Note: you may need to restart the kernel to use updated packages.


In [9]:
# Add custom.yaml
custom_data = {
    'path': f'.{DATASET_DST}',
    'train': f'images',
    'val': f'images',
    'names': {0: f'{TARGET}'}
}

with open(FINE_TUNE_CONFIG, 'w') as file:
    yaml.dump(custom_data, file)


In [16]:
# Fine tune
!python yolov5/train.py --img 640 --epochs 5 --data custom.yaml --weights yolov5s.pt

2023-11-11 15:26:45.722520: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-11 15:26:45.722571: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-11 15:26:45.722600: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=custom.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=5, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, 

In [17]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path=f'./yolov5/runs/train/exp4/weights/best.pt')

Using cache found in /home/fisher/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-11-8 Python-3.9.16 torch-2.0.1+cu117 CUDA:0 (Tesla V100-SXM2-32GB, 32511MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


In [18]:
def concat(prefix: str):
    def func(filename: str):
        return f'{prefix}/{filename}'
    return func

imgs = list(map(concat(IMAGE_DIR), os.listdir(IMAGE_DIR)))

In [19]:
results = []
for i in range(0, len(imgs), BATCH_SIZE):
    result = model(imgs[i : i + BATCH_SIZE])
    results.append(result)

In [20]:
df_data = []

for result in results:
    frames = result.files
    assert len(frames) == len(result)
    for i in range(len(result)):
        record = result.pandas().xyxy[i]
        for _, row in record.iterrows():
            prefix = frames[i].split('.')[0]    # e.g: video1_123
            # Append
            df_data.append([prefix, prefix[:6], prefix[7:], *row.to_list()])

table = {'filename': str,
         'video': str,
         'frame': int,
         'xmin': float,
         'ymin': float,
         'xmax': float,
         'ymax': float,
         'confidence': float,
         'class': int,
         'name': str}

df = pd.DataFrame(columns=table.keys(), data=df_data).astype(table)
df.to_csv('detection_result.csv', index=False)

In [21]:
video1_df = df[(df['video'] == 'video1') & (df['name'] == TARGET)].sort_values('frame')
video2_df = df[(df['video'] == 'video2') & (df['name'] == TARGET)].sort_values('frame')

In [22]:
def draw_retangle(file_path: str, position: tuple, label: str, save_path: str):
    img = cv2.imread(file_path)
    cv2.rectangle(img, pt1=(int(position[0]), int(position[1])), pt2=(int(position[2]), int(position[3])), color=(0, 255, 0), thickness=2)
    cv2.putText(img=img, text=label, org=(int(position[0]), int(position[1])), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 255), thickness=1)
    cv2.imwrite(filename=save_path, img=img)

In [23]:
if not os.path.exists(DETECT_DIR):
    os.mkdir(DETECT_DIR)

for i, row in df.iterrows():
    if row['name'] == TARGET:
        draw_retangle(f'{IMAGE_DIR}/{row["filename"]}.png', (row['xmin'], row['ymin'], row['xmax'], row['ymax']), label=row['name'], save_path=f'./{DETECT_DIR}/{row["filename"]}.png')

In [24]:
def kalman_filter(df: pd.DataFrame, fps: float = 30.0) -> list:
    # parameters
    initial_x = (df.iloc[0]['xmin'] + df.iloc[0]['xmax']) / 2
    initial_y = (df.iloc[0]['ymin'] + df.iloc[0]['ymax']) / 2
    initial_velocity_x = 0.1
    initial_velocity_y = 0.1
    dt = 1.0 / fps
    # kalman filter
    kf = KalmanFilter(dim_x=4, dim_z=2)
    kf.x = np.array([initial_x, initial_y, initial_velocity_x, initial_velocity_y])
    kf.H = np.array([[1, 0, 0, 0],
                     [0, 1, 0, 0]])
    kf.F = np.array([[1, 0, dt, 0],
                    [0, 1, 0, dt],
                    [0, 0, 1, 0],
                    [0, 0, 0, 1]])
    kf.Q *= 0.01
    kf.R *= 0.1

    trajectory = []
    for _, row in df.iterrows():
        x = (row['xmin'] + row['xmax']) / 2
        y = (row['ymin'] + row['ymax']) / 2
        measurement = np.array([x, y])
        kf.predict()
        kf.update(measurement)
        prediction = kf.x[:2]
        trajectory.append(prediction)
    return trajectory


def plot_trajectory(trajectory: list) -> None:
    trajectory = np.array(trajectory)
    plt.plot(trajectory[:, 0], trajectory[:, 1], marker='o', label='Trajectory')
    plt.xlabel('X Coordinate')
    plt.ylabel('Y Coordinate')
    plt.legend()
    plt.show()


In [25]:
trajectory1 = kalman_filter(video1_df)
trajectory2 = kalman_filter(video2_df)

In [26]:
def draw_point(file_path: str, positions: list, save_path: str):
    img = cv2.imread(file_path)
    for position in positions:
        cv2.circle(img, (int(position[0]), int(position[1])), 2, (0, 0, 255), -1)
    cv2.imwrite(filename=save_path, img=img)

if not os.path.exists(KF_DIR):
    os.mkdir(KF_DIR)

kf1_counter = 0
for _, row in video1_df.iterrows():
    kf1_counter += 1
    draw_point(f'{DETECT_DIR}/{row["filename"]}.png', trajectory1[:kf1_counter], f'./{KF_DIR}/{row["filename"]}.png')

kf2_counter = 0
for _, row in video2_df.iterrows():
    kf2_counter += 1
    draw_point(f'{DETECT_DIR}/{row["filename"]}.png', trajectory2[:kf2_counter], f'./{KF_DIR}/{row["filename"]}.png')


In [28]:
def mk_video(df: pd.DataFrame) -> None:
    pending_videos = []
    current_video = []
    # group images
    for i in range(len(df) - 1):
        if df.iloc[i + 1]['frame'] - df.iloc[i]['frame'] < WINDOW_SIZE:
            current_video.append(df.iloc[i]['filename'])
        else:
            if len(current_video) > WINDOW_SIZE:
                pending_videos.append(copy.deepcopy(current_video))
            current_video.clear()
    # make video
    for i, pending_video in enumerate(pending_videos):
        frame = cv2.imread(f'{KF_DIR}/{pending_video[0]}.png')
        h, w, c = frame.shape
        video = cv2.VideoWriter(f'./{OUTPUT_DIR}/{df.iloc[0]["video"]}_{i}.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (w, h))
        for f in pending_video:
            video.write(cv2.imread(f'{KF_DIR}/{f}.png'))
        video.release()


if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

mk_video(video1_df)
mk_video(video2_df)
