**Imports**

In [None]:
import os
from pathlib import Path
import pandas as pd
import yt_dlp
import re
from urllib.parse import urlparse, parse_qs

**Download Videos**

In [2]:
def download_video(url, name, output_path, start_time=0, duration=10):
    # Combine output path with filename
    output_path = output_path / f'{name}.mp4'
    
    ydl_opts = {
        'format': 'best',
        'outtmpl': str(output_path),
        'quiet': True,
        'no_warnings': True,
        'download_ranges': lambda info_dict, ydl: [
            {
                'start_time': start_time,
                'end_time': start_time + duration,
            }
        ],
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
            print(f"Downloaded video: {name}")
        return True
    except Exception as e:
        print(f"Error downloading video: {e}")
        return False

**Extract Youtube ID**

In [6]:
def extract_youtube_id(url):
    if not url:
        return None
    
    # Parse URL
    parsed = urlparse(url)

    patterns = [
        r'(?:youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})', # www.youtube.com/shorts/VIDEO_ID
        r'(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})', # www.youtube.com/embed/VIDEO_ID
        r'(?:youtube\.com\/v\/)([a-zA-Z0-9_-]{11})', # www.youtube.com/v/VIDEO_ID
        r'(?:youtu\.be\/)([a-zA-Z0-9_-]{11})', # youtu.be/VIDEO_ID
        r'(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})', # www.youtube.com/watch?v=VIDEO_ID
        r'(?:youtube\.com\/)([a-zA-Z0-9_-]{11})', # www.youtube.com/VIDEO_ID
    ]

    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            print("Found video ID:", match.group(1))
            return match.group(1)

    print("No video ID found in URL:", url)
    return None
    

**Create Labels**

In [7]:
def add_label(csv_path, url,start_time=0, duration=10, action_class=None, dance_style=None, labels=None, division=None, pattern=None):
    # Check if the CSV file exists
    if not os.path.exists(csv_path):
        df = pd.DataFrame(columns=['id', 'youtube_id', 'start_time', 'duration', 'action_class', 'dance_style','labels', 'division', 'pattern'])
        df.to_csv(csv_path, index=False)
    else:
        # Load existing CSV
        df = pd.read_csv(csv_path)

    # Get next ID
    if len(df) == 0:
        next_id = 1
    else:
        next_id = df['id'].max() + 1

    # Extract ID from URL
    video_id = extract_youtube_id(url)
    
    # Add new rows
    new_row = {
        'id': next_id,
        'youtube_id': video_id,
        'start_time': start_time,
        'duration': duration,
        'action_class': action_class,
        'dance_style': dance_style,
        'labels': labels,
        'division': division,
        'pattern': pattern,
    }

    # Append new rows
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    # Save updated CSV
    df.to_csv(csv_path, index=False)
    print(f"Added video with ID {next_id} to {csv_path}")

In [8]:
os.makedirs('../data', exist_ok=True)
CSV_PATH = '../data/labels.csv'

# # Function format: add_label(csv_path, url,start_time=0, duration=10, action_class=None, dance_style=None, labels=None, division=None, pattern=None)

# # champion
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=70Q_bvTQD7I&list=RD70Q_bvTQD7I&start_radio=1', 66,69 , 'dance','west_coast_swing','sugar_push','champion','basic_triple_triple')
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=_Gi93uxXLKQ&list=RD_Gi93uxXLKQ&start_radio=1', 47,51 , 'dance','west_coast_swing','sugar_tag','champion','basic_sugar_tag')
# # all-star
# add_label(CSV_PATH,'https://www.youtube.com/watch?v=bTHLn7_7Tgo',24 ,28 ,'dance','west_coast_swing','sugar_push','allstar','basic_triple_triple')
# add_label(CSV_PATH,'https://www.youtube.com/watch?v=30WvvwyAeRY', 36, 40,'dance','west_coast_swing','sugar_tag','allstar','basic_sugar_tag')

# # advanced
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=I-sehWlsqsM', 16, 19, 'dance','west_coast_swing','sugar_push','advanced','basic_triple_triple')
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=D_FExe2NGLA',66 ,69 , 'dance','west_coast_swing','sugar_tag','advanced','basic_sugar_tag')
# # intermediate
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=ydMg8IWP_WY', 39, 42 , 'dance','west_coast_swing','sugar_push','intermediate','basic_triple_triple')
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=E2Es5buSd0M', 40, 43, 'dance','west_coast_swing','sugar_tag','intermediate','basic_sugar_tag')

# # novice
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=VS0Cb7NoeRo&list=RDVS0Cb7NoeRo&start_radio=1', 44, 48, 'dance','west_coast_swing','sugar_push','novice','basic_triple_triple')
# add_label(CSV_PATH, 'https://www.youtube.com/watch?v=g2Q6Wwfo6eI',94 ,98 , 'dance','west_coast_swing','sugar_tag','novice','basic_sugar_tag')

# newcomer - no solo videos available

**Batch Download Function**

In [None]:
def download_all_from_csv(csv_path):
    df = pd.read_csv(csv_path)

    for index, row in df.iterrows():
        # Create folder with skill level (division)
        os.makedirs(f'../data/raw/videos/{row["division"]}', exist_ok=True)

        # Download video
        video_name = f"{row['division']}_{row['id']}.mp4"
        VIDEO_DIR = Path(f'../data/raw/videos/{row["division"]}')
        
        print(f"Downloading {row['id']}: {row['division']}...")
        download_video(
            url=row['youtube_id'],
            name=video_name,
            output_path=VIDEO_DIR,
            start_time=row['start_time'],
            duration=row['duration'],
        )

In [10]:
# Download all videos
download_all_from_csv(CSV_PATH)

Downloading 1: sugar_push...
Downloaded video: sugar_push_1.mp4
Downloading 2: sugar_tag...
Downloaded video: sugar_tag_2.mp4                     
Downloading 3: sugar_push...
Downloaded video: sugar_push_3.mp4                    
Downloading 4: sugar_tag...
Downloaded video: sugar_tag_4.mp4                     
Downloading 5: sugar_push...
Downloaded video: sugar_push_5.mp4                    
Downloading 6: sugar_tag...
Downloaded video: sugar_tag_6.mp4                     
Downloading 7: sugar_push...
Downloaded video: sugar_push_7.mp4                    
Downloading 8: sugar_tag...
Downloaded video: sugar_tag_8.mp4                     
Downloading 9: sugar_push...
Downloaded video: sugar_push_9.mp4                    
Downloading 10: sugar_tag...
Downloaded video: sugar_tag_10.mp4                    
