# YouTube Highlight Shorts Extractor
This notebook extracts short clips (YouTube Shorts format) from a YouTube highlight video by detecting special moments using audio spike analysis. Clips are saved as separate files for easy upload.

In [21]:
# Install required packages (uncomment if needed)
!pip install pytube librosa numpy



In [29]:
!pip install pytubefix

Collecting pytubefix
  Downloading pytubefix-9.4.1-py3-none-any.whl.metadata (5.4 kB)
Collecting aiohttp>=3.12.13 (from pytubefix)
  Downloading aiohttp-3.12.15-cp312-cp312-win_amd64.whl.metadata (7.9 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp>=3.12.13->pytubefix)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp>=3.12.13->pytubefix)
  Downloading aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting propcache>=0.2.0 (from aiohttp>=3.12.13->pytubefix)
  Downloading propcache-0.3.2-cp312-cp312-win_amd64.whl.metadata (12 kB)
Collecting yarl<2.0,>=1.17.0 (from aiohttp>=3.12.13->pytubefix)
  Downloading yarl-1.20.1-cp312-cp312-win_amd64.whl.metadata (76 kB)
Downloading pytubefix-9.4.1-py3-none-any.whl (768 kB)
   ---------------------------------------- 0.0/768.6 kB ? eta -:--:--
   ------------- -------------------------- 262.1/768.6 kB ? eta -:--:--
   ---------------------------------------- 768.6/768.6 kB

In [139]:
import os
from pytube import YouTube
import librosa
import numpy as np
import subprocess

In [141]:
# --- PARAMETERS ---
youtube_url = 'https://www.youtube.com/watch?v=II_-tx-G0Kw'  # Replace with actual URL
output_dir = 'highlight_shorts'
clip_duration = 30  # seconds per short
energy_threshold = 1.5  # Adjust for sensitivity
os.makedirs(output_dir, exist_ok=True)

In [143]:
# --- Download main video file if not present ---

from pytubefix import YouTube

import os



video_path = os.path.join(output_dir, 'video.mp4')

if not os.path.exists(video_path):

    yt = YouTube(youtube_url)

    video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()

    video_stream.download(output_path=output_dir, filename='video.mp4')

    print(f"✅ Main video downloaded: {video_path}")

else:

    print(f"✅ Main video already exists: {video_path}")

✅ Main video downloaded: highlight_shorts\video.mp4


In [145]:
# --- STEP 1: Download audio only (fast, low data) ---
import os
from pytubefix import YouTube

print("Downloading audio stream...")

try:
    # 1. Confirm URL
    print(f"Using YouTube URL: {youtube_url}")

    # 2. Create YouTube object
    yt = YouTube(youtube_url)

    # 3. Filter for audio-only streams
    audio_stream = yt.streams.filter(only_audio=True).first()
    if audio_stream is None:
        raise ValueError("No audio-only stream found for this video.")

    # 4. Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # 5. Download
    audio_path = os.path.join(output_dir, "audio.mp4")
    audio_stream.download(output_path=output_dir, filename="audio.mp4")

    print(f"Audio downloaded successfully: {audio_path}")

except Exception as e:
    print(f"Error downloading audio: {e}")


Downloading audio stream...
Using YouTube URL: https://www.youtube.com/watch?v=II_-tx-G0Kw
Audio downloaded successfully: highlight_shorts\audio.mp4


In [147]:
# --- STEP 2: Analyze audio for spikes (special moments) ---
print('Analyzing audio for special moments...')
y, sr = librosa.load(audio_path, sr=None)
frame_length = sr  # 1 second frames
hop_length = sr // 2
energy = np.array([
    np.sum(np.abs(y[i:i+frame_length]))
    for i in range(0, len(y), hop_length)
])
mean_energy = np.mean(energy)
special_indices = np.where(energy > energy_threshold * mean_energy)[0]
special_times = [int(i * hop_length / sr) for i in special_indices]

Analyzing audio for special moments...


  y, sr = librosa.load(audio_path, sr=None)


In [149]:
# --- OPTIMIZED STEP 3: Professional Highlight Shorts Extraction ---

# This cell merges nearby highlight moments, aligns shorts to scene boundaries to avoid cut-off scenes, and extracts only the necessary video segments.
# Increase min_gap_seconds for fewer, more meaningful shorts (default: 45 seconds).

import numpy as np

import subprocess



min_gap_seconds = 45  # Minimum gap between shorts (in seconds, increase to reduce shorts)

clip_duration = 30    # Duration of each short (in seconds, adjust as needed)



# Use detected highlight moments and scene changes (should be defined in previous cells)

if 'final_moments' in globals() and final_moments:

    moments_list = sorted(final_moments)

elif 'special_times' in globals() and special_times:

    moments_list = sorted(special_times)

else:

    raise ValueError('No highlight moments found. Run detection cells first.')



# If scene_changes is available, align shorts to nearest scene boundary to avoid cut-off scenes

def align_to_scene(moment, scene_changes):

    # Find the closest scene change before the moment

    scene_changes = np.array(scene_changes)

    before = scene_changes[scene_changes <= moment]
    if len(before) > 0:
        return before[-1]
    return moment


if 'scene_changes' in globals() and scene_changes:

    aligned_moments = [align_to_scene(m, scene_changes) for m in moments_list]
else:
    aligned_moments = moments_list


# Merge moments that are too close together

merged_moments = []

for moment in aligned_moments:

    if not merged_moments or moment - merged_moments[-1] >= min_gap_seconds:

        merged_moments.append(moment)



print(f'After merging and aligning, {len(merged_moments)} shorts will be created.')



# --- Extract shorts with optimized, scene-aligned moments ---

for idx, moment in enumerate(merged_moments):

    start = max(0, moment)  # Start at scene boundary or moment

    out_clip = os.path.join(output_dir, f'pro_short_{idx+1:02d}.mp4')

    cmd = [

        'ffmpeg', '-y',

        '-i', video_path,

        '-ss', str(start),

        '-t', str(clip_duration),

        '-c:v', 'libx264',

        '-c:a', 'aac',

        out_clip

    ]

    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    print(f'Saved: {out_clip}')

print('All professional, scene-aligned shorts extracted!')

After merging and aligning, 4 shorts will be created.
Saved: highlight_shorts\pro_short_01.mp4
Saved: highlight_shorts\pro_short_02.mp4
Saved: highlight_shorts\pro_short_03.mp4
Saved: highlight_shorts\pro_short_04.mp4
All professional, scene-aligned shorts extracted!
