In [20]:
import random
import json
import re
import os, sys, pickle
from pathlib import Path
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import subprocess

WORK_DIR = Path.cwd().parent
sys.path.append(str(WORK_DIR))

from src import video_tools


In [21]:
STAR_TRAIN = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_train.json"
STAR_VAL = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_val.json"
STAR_TEST = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_test.json"
STAR_SMALL = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_val_small_1000.json"

RAW_FRAMES_DIR = WORK_DIR / "data/datasets/action-genome/frames"
KEYFRAMES_INFO_PATH = WORK_DIR / "data/datasets/STAR/Video_Keyframe_IDs.csv"

RAW_VIDEO_DIR = Path(WORK_DIR / 'data/datasets/action-genome/Charades_v1_480/')
SAVE_VIDEO_DIR = Path(WORK_DIR / "experiments/video_dump")
ANNOTATION_DIR = WORK_DIR / 'src/STAR_utils/annotations/STAR_classes/'

with open(ANNOTATION_DIR / 'video_fps','rb') as f: 
    fps_info = pickle.load(f)


In [22]:
with open(STAR_TRAIN) as f:
    star_train_df = pd.read_json(f)

with open(STAR_VAL) as f:
    star_val_df = pd.read_json(f)

star_train_val_df = pd.concat([star_train_df, star_val_df], ignore_index=True)

with open(STAR_TEST) as f:
    star_test_df = pd.read_json(f)

star_all_df = pd.concat([star_train_val_df, star_test_df], ignore_index=True)


In [23]:
# Convert video_id columns to sets for comparison
train_ids      = set(star_train_df['video_id'])
val_ids        = set(star_val_df['video_id'])
train_val_ids  = set(star_train_val_df['video_id'])
test_ids       = set(star_test_df['video_id'])
all_ids        = set(star_all_df['video_id'])

# 1. Print unique counts
print("Unique video ID counts:")
print(f"  Train         : {len(train_ids)}")
print(f"  Train + Val   : {len(train_val_ids)}")
print(f"  Test          : {len(test_ids)}")
print(f"  All           : {len(all_ids)}")

# 2. Compute intersections
train_val_intersection      = train_ids & val_ids
train_test_intersection     = train_ids & test_ids
val_test_intersection       = val_ids & test_ids
train_val_test_intersection = train_val_ids & test_ids

# 3. Compute exclusives
train_exclusive_vs_val      = train_ids - val_ids
val_exclusive_vs_train      = val_ids - train_ids
test_exclusive_vs_train     = test_ids - train_ids
test_exclusive_vs_val       = test_ids - val_ids
test_exclusive_vs_train_val = test_ids - train_val_ids


print("\nIntersection metrics:")
print(f"  Train ∩ Val        : {len(train_val_intersection)}")
print(f"  Train+Val ∩ Test   : {len(train_val_test_intersection)}")


Unique video ID counts:
  Train         : 3032
  Train + Val   : 3946
  Test          : 955
  All           : 4901

Intersection metrics:
  Train ∩ Val        : 0
  Train+Val ∩ Test   : 0


STAR uses 4901 frames but the fps file has info for 9848 videos, which is the number of videos contained in the videos directory:

In [24]:
fps_info = pd.Series(fps_info)
fps_info.info()


<class 'pandas.core.series.Series'>
Index: 9848 entries, BE43L.mp4 to S8PVE.mp4
Series name: None
Non-Null Count  Dtype  
--------------  -----  
9848 non-null   float64
dtypes: float64(1)
memory usage: 153.9+ KB


In [25]:
len(list(RAW_VIDEO_DIR.iterdir()))


9848

In [26]:
raw_frames = [dir.stem for dir in RAW_FRAMES_DIR.iterdir()]
len(raw_frames)


9601

Not all the videos have the frame extracted from the ActionGenome toolkit.  
Let's see if the videos used in STAR are correctly extracted

In [27]:
all(star_all_df['video_id'].isin(raw_frames))


True

Now, let's review the frame dumping tool from Action Genome used in STAR:

```python
import os
import argparse
import warnings
from tqdm import tqdm


def dump_frames(args):
    video_dir = args.video_dir
    frame_dir = args.frame_dir
    annotation_dir = args.annotation_dir
    all_frames = args.all_frames

    # Load the list of annotated frames
    frame_list = []
    with open(os.path.join(annotation_dir, 'frame_list.txt'), 'r') as f:
        for frame in f:
            frame_list.append(frame.rstrip('\n'))

    # Create video to frames mapping
    video2frames = {}
    for path in frame_list:
        video, frame = path.split('/')
        if video not in video2frames:
            video2frames[video] = []
        video2frames[video].append(frame)

    # For each video, dump frames.
    for v in tqdm(video2frames):
        curr_frame_dir = os.path.join(frame_dir, v)
        if not os.path.exists(curr_frame_dir):
            os.makedirs(curr_frame_dir)
            # Use ffmpeg to extract frames. Different versions of ffmpeg may generate slightly different frames.
            # We used ffmpeg 2.8.15 to dump our frames.
            # Note that the frames are extracted according to their original video FPS, which is not always 24.
            # Therefore, our frame indices are different from Charades extracted frames' indices.
            os.system('ffmpeg -loglevel panic -i %s/%s %s/%%06d.png' % (video_dir, v, curr_frame_dir))

            # if not keeping all frames, only keep the annotated frames included in frame_list.txt
            if not all_frames:
                keep_frames = video2frames[v]
                frames_to_delete = set(os.listdir(curr_frame_dir)) - set(keep_frames)
                for frame in frames_to_delete:
                    os.remove(os.path.join(curr_frame_dir, frame))
        else:
            warnings.warn('Frame directory %s already exists. Skipping dumping into this directory.' % curr_frame_dir,
                          RuntimeWarning)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Dump frames")
    parser.add_argument("--video_dir", default="dataset/ag/videos",
                        help="Folder containing Charades videos.")
    parser.add_argument("--frame_dir", default="dataset/ag/frames",
                        help="Root folder containing frames to be dumped.")
    parser.add_argument("--annotation_dir", default="dataset/ag/annotations",
                        help=("Folder containing annotation files, including object_bbox_and_relationship.pkl, "
                              "person_bbox.pkl and frame_list.txt."))
    parser.add_argument("--all_frames", action="store_true",
                        help="Set if you want to dump all frames, rather than the frames listed in frame_list.txt")
    args = parser.parse_args()
    dump_frames(args)
```

**The ffmpeg command**

```bash
ffmpeg -loglevel panic -i %s/%s %s/%%06d.png' % (video_dir, v, curr_frame_dir)
```

- `-loglevel panic`   : Only show fatal errors which could lead the process to crash, such as an assertion failure.
- `-i {video_dir}/{v}`: specifies the input video file.
- `%06d.png`          : Extracts the frames of the video at native fps and saves them with the specified name

In [28]:
fps_info


BE43L.mp4    29.970030
LHPQS.mp4    59.940060
OTL44.mp4    15.076000
D548M.mp4    30.000000
4ZNNP.mp4    30.000000
               ...    
GIIMN.mp4    30.000000
2J4MA.mp4    14.995002
4GWNV.mp4    15.000000
AGWQA.mp4    29.970030
S8PVE.mp4    29.970030
Length: 9848, dtype: float64

In [29]:
video_path = str(RAW_VIDEO_DIR / "D548M.mp4")
video_path


'/home/lusha/star_code/data/datasets/action-genome/Charades_v1_480/D548M.mp4'

In [31]:
video_tools.get_video_stream_info(video_path)['duration']


'30.733333'

In [53]:
result = subprocess.run([
    "ffprobe",
    "-v", "error",
    "-select_streams", "v:0",
    "-show_entries", "stream=codec_name,width,height,r_frame_rate,avg_frame_rate,duration",
    "-of", "default=noprint_wrappers=1",
    "-i", video_path
],
capture_output=True,  # capture stdout & stderr
text=True,            # return strings instead of bytes
check=True            # raise CalledProcessError on non-zero exit
)

print(result.stdout)


codec_name=h264
width=480
height=360
r_frame_rate=30/1
avg_frame_rate=30/1
duration=30.733333

