In [6]:
# Install yt-dlp
%pip install -U yt-dlp --quiet


Note: you may need to restart the kernel to use updated packages.


In [7]:
# Download video using command-line version with higher resolution options
# Choose one of the following format options:

# Option 1: Best quality video + best audio (may result in separate files that get merged)
# !yt-dlp -f "bestvideo+bestaudio" -o "%(title)s.%(ext)s" https://www.youtube.com/watch?v=RB_bNAHj0Kg

# Option 2: Specific resolution (uncomment to use)
# !yt-dlp -f "best[height<=1080]" -o "%(title)s.%(ext)s" https://www.youtube.com/watch?v=RB_bNAHj0Kg

# Option 3: Best available up to 4K (uncomment to use)
# !yt-dlp -f "best[height<=2160]" -o "%(title)s.%(ext)s" https://www.youtube.com/watch?v=RB_bNAHj0Kg

# Option 4: Prefer higher resolution with fallback (uncomment to use)
# !yt-dlp -f "bestvideo[height>=720]+bestaudio/best" -o "%(title)s.%(ext)s" https://www.youtube.com/watch?v=RB_bNAHj0Kg


In [8]:
import yt_dlp

url = "https://www.youtube.com/watch?v=RB_bNAHj0Kg"

# Option 1: Highest quality (recommended)
ydl_opts = {
    "format": "bestvideo+bestaudio",
    "outtmpl": "%(title)s.%(ext)s",
    "ignoreerrors": True,           # Don't abort on errors
    "no_warnings": False,           # Show warnings but continue
    "keepvideo": True,              # Keep video file even if merge fails
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([url])

# Alternative options (uncomment to use instead):

# Option 2: Specific resolution up to 1080p
# ydl_opts = {
#     "format": "best[height<=1080]",
#     "outtmpl": "%(title)s.%(ext)s",
# }

# Option 3: Specific resolution up to 4K
# ydl_opts = {
#     "format": "best[height<=2160]",
#     "outtmpl": "%(title)s.%(ext)s",
# }

# Option 4: Prefer 720p+ with fallback
# ydl_opts = {
#     "format": "bestvideo[height>=720]+bestaudio/best",
#     "outtmpl": "%(title)s.%(ext)s",
# }


[youtube] Extracting URL: https://www.youtube.com/watch?v=RB_bNAHj0Kg
[youtube] RB_bNAHj0Kg: Downloading webpage
[youtube] RB_bNAHj0Kg: Downloading tv client config
[youtube] RB_bNAHj0Kg: Downloading tv player API JSON
[youtube] RB_bNAHj0Kg: Downloading ios player API JSON
[youtube] RB_bNAHj0Kg: Downloading m3u8 information
[info] RB_bNAHj0Kg: Downloading 1 format(s): 137+251
[download] Destination: ‚ÄúFrom Skin to Skeleton ÔºöTowards Biomechanically Accurate 3D Digital Humans.f137.mp4
[download] 100% of   42.70MiB in 00:00:14 at 3.02MiB/s     
[download] Destination: ‚ÄúFrom Skin to Skeleton ÔºöTowards Biomechanically Accurate 3D Digital Humans.f251.webm
[download] 100% of    7.01MiB in 00:00:01 at 3.83MiB/s   
[Merger] Merging formats into "‚ÄúFrom Skin to Skeleton ÔºöTowards Biomechanically Accurate 3D Digital Humans.mkv"


In [9]:
# Load GAVD dataset to get video URLs
import pandas as pd
import os

# Load first dataset part to get URLs
data_dir = 'GAVD/data/'
first_csv = os.path.join(data_dir, 'GAVD_Clinical_Annotations_1.csv')
df = pd.read_csv(first_csv)

# Display available columns
print("Available columns:", list(df.columns))
print(f"Dataset shape: {df.shape}")

# Check if URL column exists and show sample URLs
if 'url' in df.columns:
    unique_urls = df['url'].dropna().unique()
    print(f"\nFound {len(unique_urls)} unique video URLs")
    print("Sample URLs:")
    for i, url in enumerate(unique_urls[:5]):
        print(f"  {i+1}. {url}")
else:
    print("No 'url' column found in the dataset")


Available columns: ['seq', 'frame_num', 'cam_view', 'gait_event', 'dataset', 'gait_pat', 'bbox', 'vid_info', 'id', 'url']
Dataset shape: (91624, 10)

Found 69 unique video URLs
Sample URLs:
  1. https://www.youtube.com/watch?v=B5hrxKe2nP8
  2. https://www.youtube.com/watch?v=TgkxrrhnvlM
  3. https://www.youtube.com/watch?v=5SBtTbfELUU
  4. https://www.youtube.com/watch?v=0gAHlm79Bjo
  5. https://www.youtube.com/watch?v=jzkn287X-84


  df = pd.read_csv(first_csv)


In [10]:
# Download a single video from GAVD dataset (highest quality)
import yt_dlp

# Select first available URL from the dataset
if 'url' in df.columns and len(df['url'].dropna()) > 0:
    sample_url = df['url'].dropna().iloc[0]
    print(f"Downloading video from: {sample_url}")
    
    ydl_opts = {
        "format": "bestvideo+bestaudio",  # Highest quality video + audio
        "outtmpl": "GAVD_%(title)s.%(ext)s",   # prefix with GAVD_
    }
    
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([sample_url])
        print("Download completed successfully!")
    except Exception as e:
        print(f"Error downloading video: {e}")
else:
    print("No valid URLs found in the dataset")


Downloading video from: https://www.youtube.com/watch?v=B5hrxKe2nP8
[youtube] Extracting URL: https://www.youtube.com/watch?v=B5hrxKe2nP8
[youtube] B5hrxKe2nP8: Downloading webpage
[youtube] B5hrxKe2nP8: Downloading tv client config
[youtube] B5hrxKe2nP8: Downloading tv player API JSON
[youtube] B5hrxKe2nP8: Downloading ios player API JSON
[youtube] B5hrxKe2nP8: Downloading m3u8 information
[info] B5hrxKe2nP8: Downloading 1 format(s): 136+251
[download] GAVD_Parkinsonian Gait Video.mkv has already been downloaded
Download completed successfully!


In [1]:
# BATCH DOWNLOAD: Process all GAVD annotation files and download unique videos
import yt_dlp
import os
import pandas as pd
from collections import defaultdict
import time

os.makedirs("GAVD-videos", exist_ok=True)

# Process all 5 annotation files
data_dir = 'GAVD/data/'
annotation_files = [
    'GAVD_Clinical_Annotations_1.csv',
    'GAVD_Clinical_Annotations_2.csv', 
    'GAVD_Clinical_Annotations_3.csv',
    'GAVD_Clinical_Annotations_4.csv',
    'GAVD_Clinical_Annotations_5.csv'
]

print("üîç Processing all GAVD annotation files...")
all_videos = {}  # Dictionary to store unique videos: {video_id: url}
video_info = defaultdict(dict)  # Store additional info for each video

for i, filename in enumerate(annotation_files, 1):
    filepath = os.path.join(data_dir, filename)
    print(f"üìÅ Loading {filename}...")
    
    try:
        # Load with low_memory=False to avoid dtype warnings
        df_temp = pd.read_csv(filepath, low_memory=False)
        
        # Extract unique video URLs from this file
        valid_rows = df_temp[df_temp['url'].notna() & df_temp['id'].notna()]
        
        for _, row in valid_rows.iterrows():
            video_id = row['id']
            video_url = row['url']
            
            if video_id not in all_videos:
                all_videos[video_id] = video_url
                # Store additional metadata
                video_info[video_id] = {
                    'gait_pat': row.get('gait_pat', 'unknown'),
                    'dataset': row.get('dataset', 'unknown'),
                    'vid_info': row.get('vid_info', {}),
                    'source_file': filename
                }
        
        unique_in_file = len(valid_rows['id'].unique())
        print(f"   ‚úì Found {unique_in_file} unique videos in {filename}")
        
    except Exception as e:
        print(f"   ‚ùå Error processing {filename}: {e}")

print(f"\nüìä SUMMARY:")
print(f"Total unique videos found: {len(all_videos)}")
print(f"Ready to download {len(all_videos)} videos")

# Show sample of what we found
print(f"\nüìã Sample videos to download:")
sample_videos = list(all_videos.items())[:5]
for video_id, url in sample_videos:
    gait_type = video_info[video_id]['gait_pat']
    print(f"  ‚Ä¢ {video_id} ({gait_type}): {url}")

if len(all_videos) > 5:
    print(f"  ... and {len(all_videos) - 5} more videos")

# Ask user confirmation before starting batch download
print(f"\n‚ö†Ô∏è  This will download {len(all_videos)} videos. This may take a while and use significant bandwidth.")
print("üí° TIP: You can interrupt the process anytime with Ctrl+C")

# Download configuration
ydl_opts = {
    "format": (
        "bestvideo[ext=mp4]+bestaudio[ext=m4a]/"
        "best[ext=mp4]/best"
    ),
    "merge_output_format": "mp4",     # final file suffix
    "outtmpl": "GAVD-videos/%(id)s.%(ext)s",

    "keepvideo": False,               # üö´ throw away inter-mediates
    "writeinfojson": False,           # üö´ skip the .info.json
}



# Start batch download
print(f"\nüöÄ Starting batch download of {len(all_videos)} videos...")
downloaded_count = 0
failed_count = 0
failed_videos = []

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    for i, (video_id, video_url) in enumerate(all_videos.items(), 1):
        gait_type = video_info[video_id]['gait_pat']
        
        print(f"\n[{i}/{len(all_videos)}] Downloading: {video_id} ({gait_type})")
        print(f"URL: {video_url}")
        
        try:
            ydl.download([video_url])
            downloaded_count += 1
            print(f"‚úÖ Success! ({downloaded_count} completed)")
            
            # Small delay to be respectful to YouTube
            time.sleep(1)
            
        except Exception as e:
            failed_count += 1
            failed_videos.append((video_id, str(e)))
            print(f"‚ùå Failed: {e}")
            continue

# Final summary
print(f"\nüéâ BATCH DOWNLOAD COMPLETE!")
print(f"‚úÖ Successfully downloaded: {downloaded_count} videos")
print(f"‚ùå Failed downloads: {failed_count} videos")

if failed_videos:
    print(f"\nüìã Failed videos:")
    for video_id, error in failed_videos[:10]:  # Show first 10 failures
        print(f"  ‚Ä¢ {video_id}: {error}")
    if len(failed_videos) > 10:
        print(f"  ... and {len(failed_videos) - 10} more failures")

print(f"\nüìÅ All downloaded videos are saved in: GAVD-videos/")
print("üîç Each video is saved with its GAVD ID as filename")
print("üí° Note: You may have separate video (.mp4) and audio (.webm) files for highest quality")


üîç Processing all GAVD annotation files...
üìÅ Loading GAVD_Clinical_Annotations_1.csv...
   ‚úì Found 69 unique videos in GAVD_Clinical_Annotations_1.csv
üìÅ Loading GAVD_Clinical_Annotations_2.csv...
   ‚úì Found 80 unique videos in GAVD_Clinical_Annotations_2.csv
üìÅ Loading GAVD_Clinical_Annotations_3.csv...
   ‚úì Found 72 unique videos in GAVD_Clinical_Annotations_3.csv
üìÅ Loading GAVD_Clinical_Annotations_4.csv...
   ‚úì Found 69 unique videos in GAVD_Clinical_Annotations_4.csv
üìÅ Loading GAVD_Clinical_Annotations_5.csv...
   ‚úì Found 73 unique videos in GAVD_Clinical_Annotations_5.csv

üìä SUMMARY:
Total unique videos found: 348
Ready to download 348 videos

üìã Sample videos to download:
  ‚Ä¢ B5hrxKe2nP8 (parkinsons): https://www.youtube.com/watch?v=B5hrxKe2nP8
  ‚Ä¢ TgkxrrhnvlM (abnormal): https://www.youtube.com/watch?v=TgkxrrhnvlM
  ‚Ä¢ 5SBtTbfELUU (abnormal): https://www.youtube.com/watch?v=5SBtTbfELUU
  ‚Ä¢ 0gAHlm79Bjo (abnormal): https://www.youtube.com/watc

ERROR: [youtube] jzkn287X-84: Video unavailable


‚ùå Failed: ERROR: [youtube] jzkn287X-84: Video unavailable

[6/348] Downloading: pu5Vwf1CBO0 (abnormal)
URL: https://www.youtube.com/watch?v=pu5Vwf1CBO0
[youtube] Extracting URL: https://www.youtube.com/watch?v=pu5Vwf1CBO0
[youtube] pu5Vwf1CBO0: Downloading webpage
[youtube] pu5Vwf1CBO0: Downloading tv client config
[youtube] pu5Vwf1CBO0: Downloading tv player API JSON
[youtube] pu5Vwf1CBO0: Downloading ios player API JSON
[youtube] pu5Vwf1CBO0: Downloading m3u8 information
[info] pu5Vwf1CBO0: Downloading 1 format(s): 299+140
[download] GAVD-videos\pu5Vwf1CBO0.mp4 has already been downloaded
‚úÖ Success! (5 completed)

[7/348] Downloading: N9tJ7I4ls6I (abnormal)
URL: https://www.youtube.com/watch?v=N9tJ7I4ls6I
[youtube] Extracting URL: https://www.youtube.com/watch?v=N9tJ7I4ls6I
[youtube] N9tJ7I4ls6I: Downloading webpage
[youtube] N9tJ7I4ls6I: Downloading tv client config
[youtube] N9tJ7I4ls6I: Downloading tv player API JSON
[youtube] N9tJ7I4ls6I: Downloading ios player API JSON
[yout

ERROR: [youtube] gpNLTB58kK0: Video unavailable


‚ùå Failed: ERROR: [youtube] gpNLTB58kK0: Video unavailable

[143/348] Downloading: gRSNMxt9ZmM (abnormal)
URL: https://www.youtube.com/watch?v=gRSNMxt9ZmM
[youtube] Extracting URL: https://www.youtube.com/watch?v=gRSNMxt9ZmM
[youtube] gRSNMxt9ZmM: Downloading webpage
[youtube] gRSNMxt9ZmM: Downloading tv client config
[youtube] gRSNMxt9ZmM: Downloading tv player API JSON
[youtube] gRSNMxt9ZmM: Downloading ios player API JSON
[youtube] gRSNMxt9ZmM: Downloading m3u8 information
[info] Testing format 628
[info] gRSNMxt9ZmM: Downloading 1 format(s): 628+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 7
[download] Destination: GAVD-videos\gRSNMxt9ZmM.f628.mp4
[download] 100% of   27.76MiB in 00:00:08 at 3.08MiB/s                 
[download] Destination: GAVD-videos\gRSNMxt9ZmM.f140.m4a
[download] 100% of  552.91KiB in 00:00:02 at 210.55KiB/s 
[Merger] Merging formats into "GAVD-videos\gRSNMxt9ZmM.mp4"
Deleting original file GAVD-videos\gRSNMxt9ZmM.f628.mp4 (pass -k t

ERROR: [youtube] gXws-A4op-E: Video unavailable


‚ùå Failed: ERROR: [youtube] gXws-A4op-E: Video unavailable

[145/348] Downloading: h2d2sPwD_mU (abnormal)
URL: https://www.youtube.com/watch?v=h2d2sPwD_mU
[youtube] Extracting URL: https://www.youtube.com/watch?v=h2d2sPwD_mU
[youtube] h2d2sPwD_mU: Downloading webpage
[youtube] h2d2sPwD_mU: Downloading tv client config
[youtube] h2d2sPwD_mU: Downloading tv player API JSON
[youtube] h2d2sPwD_mU: Downloading ios player API JSON
[youtube] h2d2sPwD_mU: Downloading m3u8 information
[info] h2d2sPwD_mU: Downloading 1 format(s): 299+140
[download] Destination: GAVD-videos\h2d2sPwD_mU.f299.mp4
[download] 100% of   34.57MiB in 00:00:06 at 5.13MiB/s      
[download] Destination: GAVD-videos\h2d2sPwD_mU.f140.m4a
[download] 100% of    2.23MiB in 00:00:00 at 3.35MiB/s     
[Merger] Merging formats into "GAVD-videos\h2d2sPwD_mU.mp4"
Deleting original file GAVD-videos\h2d2sPwD_mU.f299.mp4 (pass -k to keep)
Deleting original file GAVD-videos\h2d2sPwD_mU.f140.m4a (pass -k to keep)
‚úÖ Success! (142 comp

ERROR: [youtube] JSyLnt3rLxs: Video unavailable


‚ùå Failed: ERROR: [youtube] JSyLnt3rLxs: Video unavailable

[169/348] Downloading: JUMhhwFANKE (parkinsons)
URL: https://www.youtube.com/watch?v=JUMhhwFANKE
[youtube] Extracting URL: https://www.youtube.com/watch?v=JUMhhwFANKE
[youtube] JUMhhwFANKE: Downloading webpage
[youtube] JUMhhwFANKE: Downloading tv client config
[youtube] JUMhhwFANKE: Downloading tv player API JSON
[youtube] JUMhhwFANKE: Downloading ios player API JSON
[youtube] JUMhhwFANKE: Downloading m3u8 information
[info] JUMhhwFANKE: Downloading 1 format(s): 137+140
[download] Destination: GAVD-videos\JUMhhwFANKE.f137.mp4
[download] 100% of   22.68MiB in 00:00:05 at 4.42MiB/s     
[download] Destination: GAVD-videos\JUMhhwFANKE.f140.m4a
[download] 100% of    1.29MiB in 00:00:00 at 3.88MiB/s   
[Merger] Merging formats into "GAVD-videos\JUMhhwFANKE.mp4"
Deleting original file GAVD-videos\JUMhhwFANKE.f137.mp4 (pass -k to keep)
Deleting original file GAVD-videos\JUMhhwFANKE.f140.m4a (pass -k to keep)
‚úÖ Success! (165 compl

ERROR: [youtube] sf5X4YYkWUA: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


‚ùå Failed: ERROR: [youtube] sf5X4YYkWUA: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies

[250/348] Downloading: SG_2SlYRERQ (exercise)
URL: https://www.youtube.com/watch?v=SG_2SlYRERQ
[youtube] Extracting URL: https://www.youtube.com/watch?v=SG_2SlYRERQ
[youtube] SG_2SlYRERQ: Downloading webpage
[youtube] SG_2SlYRERQ: Downloading tv client config
[youtube] SG_2SlYRERQ: Downloading tv player API JSON
[youtube] SG_2SlYRERQ: Downloading ios player API JSON
[youtube] SG_2SlYRERQ: Downloading m3u8 information
[info] SG_2SlYRERQ: Downloading 1 format(s): 136+140
[download] Destination: GAVD-videos\SG_2SlYRERQ.f136.mp4
[download] 100% of    2.22MiB in 00:00:03 at

[download] Got error: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1017)


[download] Got error: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1017)

[290/348] Downloading: W-e2_He_u6Y (antalgic)
URL: https://www.youtube.com/watch?v=W-e2_He_u6Y
[youtube] Extracting URL: https://www.youtube.com/watch?v=W-e2_He_u6Y
[youtube] W-e2_He_u6Y: Downloading webpage




[youtube] W-e2_He_u6Y: Downloading webpage




[youtube] W-e2_He_u6Y: Downloading webpage




[youtube] W-e2_He_u6Y: Downloading webpage




[youtube] W-e2_He_u6Y: Downloading tv client config




[youtube] W-e2_He_u6Y: Downloading tv client config




[youtube] W-e2_He_u6Y: Downloading tv client config




[youtube] W-e2_He_u6Y: Downloading tv client config




[youtube] W-e2_He_u6Y: Downloading iframe API JS




[youtube] W-e2_He_u6Y: Downloading iframe API JS




[youtube] W-e2_He_u6Y: Downloading iframe API JS




[youtube] W-e2_He_u6Y: Downloading iframe API JS




[youtube] W-e2_He_u6Y: Downloading tv player API JSON




[youtube] W-e2_He_u6Y: Downloading tv player API JSON
[youtube] W-e2_He_u6Y: Downloading ios player API JSON
[youtube] W-e2_He_u6Y: Downloading web player API JSON


ERROR: [youtube] W-e2_He_u6Y: Sign in to confirm you‚Äôre not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


‚ùå Failed: ERROR: [youtube] W-e2_He_u6Y: Sign in to confirm you‚Äôre not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies

[291/348] Downloading: wKfAxIgnj50 (exercise)
URL: https://www.youtube.com/watch?v=wKfAxIgnj50
[youtube] Extracting URL: https://www.youtube.com/watch?v=wKfAxIgnj50
[youtube] wKfAxIgnj50: Downloading webpage
[youtube] wKfAxIgnj50: Downloading tv client config
[youtube] wKfAxIgnj50: Downloading tv player API JSON
[youtube] wKfAxIgnj50: Downloading ios player API JSON
[youtube] wKfAxIgnj50: Downloading m3u8 information
[info] wKfAxIgnj50: Downloading 1 format(s): 136+140
[download] Destination: GAVD-videos\wKfAxIgnj50.f136.mp4
[download] 100% of    7.61MiB in 00:00:04 at 1.87MiB/s   
[download] Dest

ERROR: [youtube] XYw9gQkQv_Y: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


‚ùå Failed: ERROR: [youtube] XYw9gQkQv_Y: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies

[302/348] Downloading: y6jgFgMVJmA (abnormal)
URL: https://www.youtube.com/watch?v=y6jgFgMVJmA
[youtube] Extracting URL: https://www.youtube.com/watch?v=y6jgFgMVJmA
[youtube] y6jgFgMVJmA: Downloading webpage
[youtube] y6jgFgMVJmA: Downloading tv client config
[youtube] y6jgFgMVJmA: Downloading tv player API JSON
[youtube] y6jgFgMVJmA: Downloading ios player API JSON
[youtube] y6jgFgMVJmA: Downloading m3u8 information
[info] Testing format 625
[info] y6jgFgMVJmA: Downloading 1 format(s): 625+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 5
[downl

ERROR: [youtube] YjRoLtP1di0: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


‚ùå Failed: ERROR: [youtube] YjRoLtP1di0: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies

[308/348] Downloading: yk6iSZuuk24 (stroke)
URL: https://www.youtube.com/watch?v=yk6iSZuuk24
[youtube] Extracting URL: https://www.youtube.com/watch?v=yk6iSZuuk24
[youtube] yk6iSZuuk24: Downloading webpage
[youtube] yk6iSZuuk24: Downloading tv client config
[youtube] yk6iSZuuk24: Downloading tv player API JSON
[youtube] yk6iSZuuk24: Downloading ios player API JSON
[youtube] yk6iSZuuk24: Downloading m3u8 information
[info] Testing format 609
[info] yk6iSZuuk24: Downloading 1 format(s): 609+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 47
[downlo

ERROR: [youtube] yULxvDc9e8c: Video unavailable


‚ùå Failed: ERROR: [youtube] yULxvDc9e8c: Video unavailable

[312/348] Downloading: Z_SEzAFi6-s (stroke)
URL: https://www.youtube.com/watch?v=Z_SEzAFi6-s
[youtube] Extracting URL: https://www.youtube.com/watch?v=Z_SEzAFi6-s
[youtube] Z_SEzAFi6-s: Downloading webpage
[youtube] Z_SEzAFi6-s: Downloading tv client config
[youtube] Z_SEzAFi6-s: Downloading tv player API JSON
[youtube] Z_SEzAFi6-s: Downloading ios player API JSON
[youtube] Z_SEzAFi6-s: Downloading m3u8 information
[info] Z_SEzAFi6-s: Downloading 1 format(s): 137+140
[download] Destination: GAVD-videos\Z_SEzAFi6-s.f137.mp4
[download] 100% of    9.44MiB in 00:00:03 at 2.53MiB/s   
[download] Destination: GAVD-videos\Z_SEzAFi6-s.f140.m4a
[download] 100% of  725.62KiB in 00:00:00 at 1.30MiB/s     
[Merger] Merging formats into "GAVD-videos\Z_SEzAFi6-s.mp4"
Deleting original file GAVD-videos\Z_SEzAFi6-s.f137.mp4 (pass -k to keep)
Deleting original file GAVD-videos\Z_SEzAFi6-s.f140.m4a (pass -k to keep)
‚úÖ Success! (302 completed

ERROR: [youtube] zMeKiOtDG9I: Video unavailable. This video is no longer available because the YouTube account associated with this video has been terminated.


‚ùå Failed: ERROR: [youtube] zMeKiOtDG9I: Video unavailable. This video is no longer available because the YouTube account associated with this video has been terminated.

[319/348] Downloading: ZmpGIPxKlok (abnormal)
URL: https://www.youtube.com/watch?v=ZmpGIPxKlok
[youtube] Extracting URL: https://www.youtube.com/watch?v=ZmpGIPxKlok
[youtube] ZmpGIPxKlok: Downloading webpage
[youtube] ZmpGIPxKlok: Downloading tv client config
[youtube] ZmpGIPxKlok: Downloading tv player API JSON
[youtube] ZmpGIPxKlok: Downloading ios player API JSON
[youtube] ZmpGIPxKlok: Downloading m3u8 information
[info] ZmpGIPxKlok: Downloading 1 format(s): 136+140
[download] Destination: GAVD-videos\ZmpGIPxKlok.f136.mp4
[download] 100% of   21.52MiB in 00:00:07 at 2.90MiB/s     
[download] Destination: GAVD-videos\ZmpGIPxKlok.f140.m4a
[download] 100% of    2.53MiB in 00:00:00 at 3.39MiB/s   
[Merger] Merging formats into "GAVD-videos\ZmpGIPxKlok.mp4"
Deleting original file GAVD-videos\ZmpGIPxKlok.f140.m4a (pass 

[download] Got error: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1017)


[info] Unable to download format 616. Skipping...
[info] eCCYhDSDlDc: Downloading 1 format(s): 399+140
[download] Destination: GAVD-videos\eCCYhDSDlDc.f399.mp4
[download] 100% of    4.41MiB in 00:00:03 at 1.27MiB/s   
[download] Destination: GAVD-videos\eCCYhDSDlDc.f140.m4a
[download] 100% of  774.71KiB in 00:00:00 at 1.31MiB/s   
[Merger] Merging formats into "GAVD-videos\eCCYhDSDlDc.mp4"
Deleting original file GAVD-videos\eCCYhDSDlDc.f140.m4a (pass -k to keep)
Deleting original file GAVD-videos\eCCYhDSDlDc.f399.mp4 (pass -k to keep)
‚úÖ Success! (313 completed)

[325/348] Downloading: EHx08gSNfxQ (normal)
URL: https://www.youtube.com/watch?v=EHx08gSNfxQ
[youtube] Extracting URL: https://www.youtube.com/watch?v=EHx08gSNfxQ
[youtube] EHx08gSNfxQ: Downloading webpage
[youtube] EHx08gSNfxQ: Downloading tv client config
[youtube] EHx08gSNfxQ: Downloading tv player API JSON
[youtube] EHx08gSNfxQ: Downloading ios player API JSON
[youtube] EHx08gSNfxQ: Downloading m3u8 information
[info] EHx

[download] Got error: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1017)


[download] Got error: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1017)

[328/348] Downloading: yFBy0X0D-w8 (normal)
URL: https://www.youtube.com/watch?v=yFBy0X0D-w8
[youtube] Extracting URL: https://www.youtube.com/watch?v=yFBy0X0D-w8
[youtube] yFBy0X0D-w8: Downloading webpage
[youtube] yFBy0X0D-w8: Downloading tv client config
[youtube] yFBy0X0D-w8: Downloading tv player API JSON
[youtube] yFBy0X0D-w8: Downloading ios player API JSON
[youtube] yFBy0X0D-w8: Downloading m3u8 information
[info] Testing format 620
[info] yFBy0X0D-w8: Downloading 1 format(s): 620+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 3
[download] Destination: GAVD-videos\yFBy0X0D-w8.f620.mp4
[download] 100% of    4.30MiB in 00:00:03 at 1.09MiB/s                 
[download] Destination: GAVD-videos\yFBy0X0D-w8.f140.m4a
[download] 100% of  250.98KiB in 00:00:03 at 80.00KiB/s  
[Merger] Merging formats into "GAVD-videos\yFBy0X0D-w8.mp4"
Deleting original 

In [3]:
# SEQUENCE EXTRACTION: Create individual sequence videos from downloaded videos
import os
import pandas as pd
import subprocess
import json
from collections import defaultdict
import glob

# Create sequences folder
sequences_dir = "GAVD-sequences"
os.makedirs(sequences_dir, exist_ok=True)

# Load all annotation files to get sequence information
data_dir = 'GAVD/data/'
annotation_files = [
    'GAVD_Clinical_Annotations_1.csv',
    'GAVD_Clinical_Annotations_2.csv', 
    'GAVD_Clinical_Annotations_3.csv',
    'GAVD_Clinical_Annotations_4.csv',
    'GAVD_Clinical_Annotations_5.csv'
]

print("üîç Processing annotation files to extract sequence information...")

# Dictionary to store sequence info: {seq_id: {video_id, start_frame, end_frame, metadata}}
sequences_info = {}
video_sequences = defaultdict(list)  # Group sequences by video_id

for filename in annotation_files:
    filepath = os.path.join(data_dir, filename)
    print(f"üìÅ Loading {filename}...")
    
    try:
        df_temp = pd.read_csv(filepath, low_memory=False)
        
        # Group by sequence ID to get frame ranges
        for seq_id, seq_group in df_temp.groupby('seq'):
            if pd.isna(seq_id):
                continue
                
            video_id = seq_group['id'].iloc[0]
            if pd.isna(video_id):
                continue
                
            # Get frame range for this sequence
            start_frame = seq_group['frame_num'].min()
            end_frame = seq_group['frame_num'].max()
            
            # Store sequence metadata
            sequences_info[seq_id] = {
                'video_id': video_id,
                'start_frame': start_frame,
                'end_frame': end_frame,
                'gait_pat': seq_group['gait_pat'].iloc[0],
                'cam_view': seq_group['cam_view'].iloc[0],
                'dataset': seq_group['dataset'].iloc[0],
                'source_file': filename,
                'frame_count': end_frame - start_frame + 1
            }
            
            video_sequences[video_id].append(seq_id)
        
        print(f"   ‚úì Processed {len(df_temp['seq'].unique())} sequences from {filename}")
        
    except Exception as e:
        print(f"   ‚ùå Error processing {filename}: {e}")

print(f"\nüìä SEQUENCE SUMMARY:")
print(f"Total sequences found: {len(sequences_info)}")
print(f"Videos with sequences: {len(video_sequences)}")

# Show sample sequences
print(f"\nüìã Sample sequences:")
sample_seqs = list(sequences_info.items())[:5]
for seq_id, info in sample_seqs:
    print(f"  ‚Ä¢ {seq_id}: {info['video_id']} frames {info['start_frame']}-{info['end_frame']} ({info['gait_pat']})")

# Check which sequences already exist
existing_sequences = set()
for seq_file in glob.glob(os.path.join(sequences_dir, "*.mp4")):
    seq_name = os.path.splitext(os.path.basename(seq_file))[0]
    existing_sequences.add(seq_name)

sequences_to_process = {k: v for k, v in sequences_info.items() if k not in existing_sequences}

print(f"\nüîÑ PROCESSING STATUS:")
print(f"Already processed: {len(existing_sequences)} sequences")
print(f"To be processed: {len(sequences_to_process)} sequences")

if len(sequences_to_process) == 0:
    print("‚úÖ All sequences have already been processed!")
else:
    print(f"\n‚ö†Ô∏è  This will process {len(sequences_to_process)} sequences.")
    print("üí° TIP: You can interrupt the process anytime with Ctrl+C")

# Function to get video frame rate and duration
def get_video_info(video_path):
    """Get video information using ffprobe"""
    try:
        cmd = [
            'ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_streams', video_path
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        data = json.loads(result.stdout)
        
        for stream in data['streams']:
            if stream['codec_type'] == 'video':
                fps_str = stream.get('r_frame_rate', '30/1')
                fps = eval(fps_str) if '/' in fps_str else float(fps_str)
                duration = float(stream.get('duration', 0))
                return fps, duration
        return 30.0, 0.0  # Default values
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Could not get video info, using defaults: {e}")
        return 30.0, 0.0

# Function to extract sequence from video
def extract_sequence(video_path, seq_id, start_frame, end_frame, output_path):
    """Extract a sequence from video using ffmpeg"""
    try:
        # Get video info
        fps, duration = get_video_info(video_path)
        
        # Calculate time positions
        start_time = start_frame / fps
        end_time = end_frame / fps
        duration_seq = end_time - start_time
        
        # FFmpeg command to extract sequence
        cmd = [
            'ffmpeg', '-y',  # Overwrite output file
            '-i', video_path,
            '-ss', str(start_time),  # Start time
            '-t', str(duration_seq),  # Duration
            '-c:v', 'libx264',  # Video codec
            '-c:a', 'aac',  # Audio codec
            '-avoid_negative_ts', 'make_zero',
            output_path
        ]
        
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        return True, ""
        
    except subprocess.CalledProcessError as e:
        return False, f"FFmpeg error: {e.stderr}"
    except Exception as e:
        return False, f"Error: {str(e)}"

# Check if ffmpeg is available
def check_ffmpeg():
    try:
        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
        return True
    except:
        return False

if not check_ffmpeg():
    print("‚ùå FFmpeg is not installed or not in PATH!")
    print("Please install FFmpeg to extract video sequences.")
    print("Download from: https://ffmpeg.org/download.html")
else:
    print("‚úÖ FFmpeg is available")
    
    # Start sequence extraction
    if len(sequences_to_process) > 0:
        print(f"\nüöÄ Starting sequence extraction...")
        
        processed_count = 0
        failed_count = 0
        failed_sequences = []
        
        for seq_id, seq_info in sequences_to_process.items():
            video_id = seq_info['video_id']
            start_frame = seq_info['start_frame']
            end_frame = seq_info['end_frame']
            gait_type = seq_info['gait_pat']
            
            # Find the downloaded video file (could be .mp4 or .webm)
            video_files = glob.glob(f"GAVD-videos/{video_id}.*")
            video_file = None
            
            # Prefer .mp4 files over .webm
            for vf in video_files:
                if vf.endswith('.mp4'):
                    video_file = vf
                    break
            if not video_file and video_files:
                video_file = video_files[0]  # Use any available file
            
            if not video_file:
                failed_count += 1
                failed_sequences.append((seq_id, f"Video file not found for {video_id}"))
                print(f"‚ùå [{processed_count+failed_count+1}/{len(sequences_to_process)}] {seq_id}: Video file not found")
                continue
            
            output_path = os.path.join(sequences_dir, f"{seq_id}.mp4")
            
            print(f"\n[{processed_count+failed_count+1}/{len(sequences_to_process)}] Extracting: {seq_id}")
            print(f"   Video: {video_id} ({gait_type})")
            print(f"   Frames: {start_frame}-{end_frame} ({seq_info['frame_count']} frames)")
            print(f"   Source: {os.path.basename(video_file)}")
            
            success, error = extract_sequence(video_file, seq_id, start_frame, end_frame, output_path)
            
            if success:
                processed_count += 1
                print(f"   ‚úÖ Success! Saved to: {seq_id}.mp4")
                # delay for 1 second
                time.sleep(1)
            else:
                failed_count += 1
                failed_sequences.append((seq_id, error))
                print(f"   ‚ùå Failed: {error}")
        
        # Final summary
        print(f"\nüéâ SEQUENCE EXTRACTION COMPLETE!")
        print(f"‚úÖ Successfully processed: {processed_count} sequences")
        print(f"‚ùå Failed extractions: {failed_count} sequences")
        
        if failed_sequences:
            print(f"\nüìã Failed sequences:")
            for seq_id, error in failed_sequences[:10]:  # Show first 10 failures
                print(f"  ‚Ä¢ {seq_id}: {error}")
            if len(failed_sequences) > 10:
                print(f"  ... and {len(failed_sequences) - 10} more failures")
        
        print(f"\nüìÅ All sequence videos are saved in: {sequences_dir}/")
        print("üîç Each sequence is saved as: <sequence_id>.mp4")
        print("üí° Sequences are extracted based on frame numbers from the annotation data")


üîç Processing annotation files to extract sequence information...
üìÅ Loading GAVD_Clinical_Annotations_1.csv...
   ‚úì Processed 374 sequences from GAVD_Clinical_Annotations_1.csv
üìÅ Loading GAVD_Clinical_Annotations_2.csv...
   ‚úì Processed 353 sequences from GAVD_Clinical_Annotations_2.csv
üìÅ Loading GAVD_Clinical_Annotations_3.csv...
   ‚úì Processed 369 sequences from GAVD_Clinical_Annotations_3.csv
üìÅ Loading GAVD_Clinical_Annotations_4.csv...
   ‚úì Processed 313 sequences from GAVD_Clinical_Annotations_4.csv
üìÅ Loading GAVD_Clinical_Annotations_5.csv...
   ‚úì Processed 469 sequences from GAVD_Clinical_Annotations_5.csv

üìä SEQUENCE SUMMARY:
Total sequences found: 1874
Videos with sequences: 348

üìã Sample sequences:
  ‚Ä¢ cljan9b4p00043n6ligceanyp: B5hrxKe2nP8 frames 1757-2268 (parkinsons)
  ‚Ä¢ cljanb45y00083n6lmh1qhydd: B5hrxKe2nP8 frames 2532-2746 (parkinsons)
  ‚Ä¢ cljao8kyf000d3n6l0x9kgmav: TgkxrrhnvlM frames 1-148 (abnormal)
  ‚Ä¢ cljaoak47000i3n6lsrb9rit