## Libraries and Directories

In [1]:
import csv
import os
import subprocess
import cv2
from roboflow import Roboflow
from pathlib import Path
#os.chdir("C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/1. Data")

In [None]:
DATA_ROOT = Path.home() / "OneDrive" / "Documents" / "MSc Data Science" / "Year 3" / "1. Data"
VIDEO_DIR  = DATA_ROOT / "0_Hornet"                     # folder containing full videos
CLIP_DIR = DATA_ROOT / "2_hornet_sighting_clips"      # where to save extracted intervals
CSV_PATH   = DATA_ROOT / "1_hornet_sightings.csv"       # CSV file with intervals

In [None]:
print(VIDEO_DIR)
print(CLIP_DIR)
print(CSV_PATH)

C:\Users\User\OneDrive\Documents\MSc Data Science\Year 3\1. Data\0_Hornet
C:\Users\User\OneDrive\Documents\MSc Data Science\Year 3\1. Data\2_hornet_sighting_clips
C:\Users\User\OneDrive\Documents\MSc Data Science\Year 3\1. Data\1_hornet_sightings.csv


## Extract clips with hornets

In [None]:
# Read CSV
with open(CSV_PATH, newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        if not row or row[0].lower() == "video":
            continue  # skip headers or empty rows

        video_name, start, end = row
        video_path = os.path.join(VIDEO_DIR, video_name)
        if not os.path.exists(video_path):
            print(f"Video not found: {video_path}")
            continue

        # Construct output filename
        base, _ = os.path.splitext(video_name)
        clip_name = f"{base}_{start.replace(':','-')}_{end.replace(':','-')}.mp4"
        out_path = os.path.join(CLIP_DIR, clip_name)

        # ffmpeg command: copy (no re-encode, lossless & fast)
        cmd = [
            "ffmpeg",
            "-y",  # overwrite
            "-ss", start,  # start time
            "-to", end,    # end time
            "-i", video_path,
            "-c", "copy",  # copy codec to avoid recompression
            out_path
        ]

        print(f"Extracting {clip_name} ...")
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

print("All clips extracted.")


Extracting GX010019_00-01-05_00-01-08.mp4 ...
Extracting GX010019_00-02-01_00-02-04.mp4 ...
Extracting GX010019_00-02-55_00-02-58.mp4 ...
Extracting GX010019_00-03-05_00-03-07.mp4 ...
Extracting GX010019_00-11-08_00-11-09.mp4 ...
Extracting GX010019_00-11-14_00-11-15.mp4 ...
Extracting GX010019_00-18-54_00-18-56.mp4 ...
Extracting GX010019_00-19-17_00-19-18.mp4 ...
Extracting GX010019_00-21-29_00-21-33.mp4 ...
Extracting GX010019_00-22-55_00-22-56.mp4 ...
Extracting GX010019_00-23-03_00-23-07.mp4 ...
Extracting GX010019_00-23-36_00-23-37.mp4 ...
Extracting GX010019_00-23-52_00-23-58.mp4 ...
Extracting GX010019_00-24-08_00-24-15.mp4 ...
Extracting GX010019_00-25-21_00-25-22.mp4 ...
Extracting GX010019_00-25-27_00-25-29.mp4 ...
Extracting GX010020_00-02-53_00-02-54.mp4 ...
Extracting GX010020_00-03-51_00-03-53.mp4 ...
Extracting GX010020_00-08-04_00-08-06.mp4 ...
Extracting GX010020_00-08-33_00-08-37.mp4 ...
Extracting GX010020_00-10-05_00-10-09.mp4 ...
Extracting GX010020_00-18-39_00-18

## Extract frames with hornets

In [4]:
# ---- CONFIG ----
FRAME_DIR   = DATA_ROOT / "hornet_sighting_frames"     # root folder where frames will go
SAVE_EVERY_N = 1         # 1 = save every frame, 5 = every 5th frame, etc.
# -----------------

os.makedirs(FRAME_DIR, exist_ok=True)

In [None]:
for video_name in os.listdir(CLIP_DIR):
    if not video_name.lower().endswith((".mp4")):
        continue  # skip non-video files

    video_path = os.path.join(CLIP_DIR, video_name)
    base = os.path.splitext(video_name)[0]

    # each clip gets its own folder: frames/GX020171_00-27-21_00-27-32/
    frame_folder = os.path.join(FRAME_DIR, base)
    os.makedirs(frame_folder, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Cannot open {video_path}")
        continue

    frame_idx = 0
    saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # save every Nth frame
        if frame_idx % SAVE_EVERY_N == 0:
            out_name = f"frame_{saved:06d}.jpg"
            out_path = os.path.join(frame_folder, out_name)
            cv2.imwrite(out_path, frame)
            saved += 1

        frame_idx += 1

    cap.release()
    print(f"{video_name}: saved {saved} frames to {frame_folder}")

print("Done.")


GX010019_00-01-05_00-01-08.mp4: saved 89 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-01-05_00-01-08
GX010019_00-02-01_00-02-04.mp4: saved 90 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-02-01_00-02-04
GX010019_00-02-55_00-02-58.mp4: saved 90 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-02-55_00-02-58
GX010019_00-03-05_00-03-07.mp4: saved 60 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-03-05_00-03-07
GX010019_00-11-08_00-11-09.mp4: saved 30 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-11-08_00-11-09
GX010019_00-11-14_00-11-15.mp4: saved 30 frames to C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010019_00-11-14_00-11-15
GX010019_00-18-54_00-18-56.mp4: saved 59 frames to C

## Upload frames to Roboflow

In [16]:
"""

import os
from roboflow import Roboflow

# -----------------------------
# CONFIG
# -----------------------------
API_KEY = "DP7wL8JTZ4ZGgEmcVxjB"
WORKSPACE = "hornet-detection-1wa7m"        # e.g. hornet-detection-1wa7m
PROJECT = "hornet-detection-deo0z"           # e.g. hornet-detection-de0oz
ROOT_FOLDER = "C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames"

"""

In [17]:
# -----------------------------
# CONNECT TO ROBOFLOW
# -----------------------------
#rf = Roboflow(api_key=API_KEY)
#project = rf.workspace(WORKSPACE).project(PROJECT)

loading Roboflow workspace...
loading Roboflow project...


In [19]:
# -----------------------------
# BULK UPLOAD
# -----------------------------

"""
count = 0

for folder, subfolders, files in os.walk(ROOT_FOLDER):
    
    # Get the folder name to use as batch name
    batch_name = os.path.basename(folder)
    
    # Skip the root folder (which is just 'hornet_sighting_frames')
    if batch_name == os.path.basename(ROOT_FOLDER):
        continue

    print(f"\n=== Uploading batch: {batch_name} ===")
    
    for file in files:
            image_path = os.path.join(folder, file)
            
            try:
                project.upload(
                    image_path=image_path,
                    batch_name=batch_name,
                    num_retry_uploads=3
                )
                count += 1
                print(f"Uploaded: {image_path}")
            except Exception as e:
                print(f"Failed to upload {image_path}: {e}")

print(f"\n Uploaded {count} images to Roboflow.")

"""



=== Uploading batch: GX010026_00-00-27_00-00-53 ===
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000092.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000093.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000094.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000095.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000096.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000097.jpg
Uploaded: C:/Users/User/OneDrive/Documents/MSc Data Science/Year 3/hornet_sighting_frames\GX010026_00-00-27_00-00-53\frame_000098.jpg
Uploaded: