In [1]:
import shutil
import os

import cv2
import torch
import numpy as np
import pandas as pd

In [2]:
# Define path directories
WALKING_TERRAIN_VIDEOS = ["/media/luchocode/Extra vol/thesis/videos/records"]
OUTPUT_VIDEO_FRAMES = "/media/luchocode/Extra vol/thesis/data/walking_envs"
DIR_SAVE_DF = "/media/luchocode/Extra vol/thesis/pickle"

In [23]:
def save_frame(
    frame: cv2.typing.MatLike,
    video_name: str,
    num_frame: int,
    _class: str,
    output_dir: str
):
    dir_to_save_frame = os.path.join(output_dir, _class)
    if not os.path.exists(dir_to_save_frame):
        os.makedirs(dir_to_save_frame)
    
    frame_name = f"{video_name}_frame_{num_frame}.jpg"
    name = os.path.join(dir_to_save_frame, frame_name)
    cv2.imwrite(
        name,
        frame
    )

    return os.path.join(_class, frame_name)


def show_initial_frames_of_video(
    cap: cv2.VideoCapture,
    num_frames: int = 120
) -> str:

    count_frames = 1
    label = ""
    while(True):
        ret, frame = cap.read()
        resized = cv2.resize(frame, (500, 700), interpolation=cv2.INTER_AREA)
        cv2.imshow(f"Video frame", resized)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        if count_frames < num_frames:
            count_frames += 1
            continue
        
        # Choose the video's label

        print("Choose a label:")
        print("Level Ground: l")
        print("Stairs up: u")
        print("Stairs down: d")
        
        while(label == ""):
            key = cv2.waitKey(0) & 0xFF

            if key == ord('u'):
                print("The video is STAIRS UP!")
                label = "SU"
                break

            if key == ord('l'):
                print("The video is LEVEL GROUND!")
                label = "LG"
                break

            if key == ord('d'):
                print("The video is STAIRS DOWN!")
                label = "SD"
                break
            
            print("A label must be provided!")
        
        if label != "":
            break

    return label


def process_videos(
    video_path: str,
    output_dir: str,
    df: pd.DataFrame
):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"The directory '{video_path}' doesn't exist")
    
    n_videos = len(os.listdir(video_path))
    for i, video_name in enumerate(os.listdir(video_path)):
        video = os.path.join(video_path, video_name)
        cap = cv2.VideoCapture(video)

        if not cap.isOpened():
            print("Error: Could not open video file.")
            continue
        else:
            print(f"Video file '{video}' opened successfully!")

        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Get total number of frames in the video
        fps = cap.get(cv2.CAP_PROP_FPS)  # Get frames per second (FPS)
        print(f"Total frames: {frame_count}, FPS: {fps}")
        print(f"Video {i+1}/{n_videos}")

        video_label = show_initial_frames_of_video(cap)

        cap = cv2.VideoCapture(video)

        counter = 1

        while(True):
            ret, frame = cap.read()

            if not ret:
                print("Error: Could not read the frame.")
                break

            frame_name = save_frame(
                frame, 
                video_name, 
                num_frame=counter, 
                _class=video_label, 
                output_dir=output_dir
            )

            df = pd.concat([
                df, pd.DataFrame([{
                    'video': video_name, 
                    'frame': counter, 
                    'class': video_label, 
                    'path': frame_name
                }])
            ], ignore_index=True)

            if counter % 10 == 0:
                print(f"Frames processed: {counter}/{frame_count}")
            
            counter +=1 

        cap.release()
        cv2.destroyAllWindows()

    return df

In [24]:
df = pd.DataFrame({
    'video': [],
    'frame': [],
    'class': [],
    'path': []
})
for video in WALKING_TERRAIN_VIDEOS:
    df = process_videos(video, output_dir=OUTPUT_VIDEO_FRAMES, df=df)

Video file '/media/luchocode/Extra vol/thesis/videos/records/IMG_4002.MOV' opened successfully!
Total frames: 728, FPS: 60.008242890506935
Video 1/52
Choose a label:
Level Ground: l
Stairs up: u
Stairs down: d
The video is STAIRS UP!
Frames processed: 10/728
Frames processed: 20/728
Frames processed: 30/728
Frames processed: 40/728
Frames processed: 50/728
Frames processed: 60/728
Frames processed: 70/728
Frames processed: 80/728
Frames processed: 90/728
Frames processed: 100/728
Frames processed: 110/728
Frames processed: 120/728
Frames processed: 130/728
Frames processed: 140/728
Frames processed: 150/728
Frames processed: 160/728
Frames processed: 170/728
Frames processed: 180/728
Frames processed: 190/728
Frames processed: 200/728
Frames processed: 210/728
Frames processed: 220/728
Frames processed: 230/728
Frames processed: 240/728
Frames processed: 250/728
Frames processed: 260/728
Frames processed: 270/728
Frames processed: 280/728
Frames processed: 290/728
Frames processed: 300

In [25]:
df.to_pickle(os.path.join(DIR_SAVE_DF, "df_stairs_ground.pkl"))

In [17]:
df = pd.read_pickle(os.path.join(DIR_SAVE_DF, "df_stairs_ground.pkl"))

In [20]:
#280 - 569 from sd to su
video_name = "WhatsApp Video 2026-01-20 at 19.52.54.mp4"
df_video = df[df["video"] == video_name]
df_video.tail()
for i in range(279, 569):
    df.at[df_video.index[i], "class"] = "SU"
    df.at[df_video.index[i], "path"] = df.at[df_video.index[i], "path"].replace("SD", "SU")

df_video.tail()

Unnamed: 0,video,frame,class,path
13489,WhatsApp Video 2026-01-20 at 19.52.54.mp4,565.0,SU,SU/WhatsApp Video 2026-01-20 at 19.52.54.mp4_f...
13490,WhatsApp Video 2026-01-20 at 19.52.54.mp4,566.0,SU,SU/WhatsApp Video 2026-01-20 at 19.52.54.mp4_f...
13491,WhatsApp Video 2026-01-20 at 19.52.54.mp4,567.0,SU,SU/WhatsApp Video 2026-01-20 at 19.52.54.mp4_f...
13492,WhatsApp Video 2026-01-20 at 19.52.54.mp4,568.0,SU,SU/WhatsApp Video 2026-01-20 at 19.52.54.mp4_f...
13493,WhatsApp Video 2026-01-20 at 19.52.54.mp4,569.0,SU,SU/WhatsApp Video 2026-01-20 at 19.52.54.mp4_f...


In [22]:
df.to_pickle(os.path.join(DIR_SAVE_DF, "df_stairs_ground.pkl"))

In [31]:
# verify if images exists
df["exist"] = True

for idx, row in df.iterrows():
    img_path = os.path.join(OUTPUT_VIDEO_FRAMES, row["path"])

    if not os.path.exists(img_path):
        df.at[idx, "exist"] = False

df = df[df["exist"] == True]

In [32]:
df.to_pickle(os.path.join(DIR_SAVE_DF, "df_stairs_ground.pkl"))