In [1]:
from pathlib import Path

from utilities import (
    extract_audio_duration,
    display_verses_with_timing,
    create_ass_file,
    extract_lyrics_with_timing
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
vocal_file_path = Path("./audio_processing/karaoke_files/input_vocals/")
vocals = vocal_file_path / "droppin_seeds.mp3"

verses = extract_lyrics_with_timing(str(vocals))

verses

[{'verse_number': 1,
  'text': ' like  adam  in  the  garden  of  eve  my  bitch  got  an  apple  bottom  and  she  swallowed  my  seeds  follow',
  'start': 11.959999999999996,
  'end': 17.86,
  'words': [{'word': ' like',
    'word_number': 1,
    'start': 11.959999999999996,
    'end': 12.539999999999997,
    'probability': 0.052825927734375},
   {'word': ' adam',
    'word_number': 2,
    'start': 12.539999999999997,
    'end': 13.12,
    'probability': 0.467529296875},
   {'word': ' in',
    'word_number': 3,
    'start': 13.12,
    'end': 13.34,
    'probability': 0.92236328125},
   {'word': ' the',
    'word_number': 4,
    'start': 13.34,
    'end': 13.44,
    'probability': 0.97412109375},
   {'word': ' garden',
    'word_number': 5,
    'start': 13.44,
    'end': 13.84,
    'probability': 0.9716796875},
   {'word': ' of',
    'word_number': 6,
    'start': 13.84,
    'end': 14.02,
    'probability': 0.908203125},
   {'word': ' eve',
    'word_number': 7,
    'start': 14.02,
 

In [3]:
from pprint import pprint

pprint(verses[0])

{'end': 17.86,
 'start': 11.959999999999996,
 'text': ' like  adam  in  the  garden  of  eve  my  bitch  got  an  apple  '
         'bottom  and  she  swallowed  my  seeds  follow',
 'verse_number': 1,
 'words': [{'end': 12.539999999999997,
            'probability': 0.052825927734375,
            'start': 11.959999999999996,
            'word': ' like',
            'word_number': 1},
           {'end': 13.12,
            'probability': 0.467529296875,
            'start': 12.539999999999997,
            'word': ' adam',
            'word_number': 2},
           {'end': 13.34,
            'probability': 0.92236328125,
            'start': 13.12,
            'word': ' in',
            'word_number': 3},
           {'end': 13.44,
            'probability': 0.97412109375,
            'start': 13.34,
            'word': ' the',
            'word_number': 4},
           {'end': 13.84,
            'probability': 0.9716796875,
            'start': 13.44,
            'word': ' garden',
       

In [None]:
display_verses_with_timing(verses[:2])

___
___
___

In [4]:
# Paths to areas being used

# font_file_path = Path("./audio_processing/karaoke_files/input_fonts/")
# lyrics_file_path = Path("./audio_processing/karaoke_files/input_lyrics/")

instrumental_file_path = Path("./audio_processing/karaoke_files/input_instrumentals/")
output_video_path = Path("./audio_processing/karaoke_files/output_videos/")
output_ass_path = Path("./audio_processing/karaoke_files/output_ass/")

# Actual file in those paths

# karaoke_font = font_file_path / "enter-the-gungeon-big.ttf"
karaoke_music = instrumental_file_path / "droppin_seeds.mp3"
karaoke_video = output_video_path / "droppin_seeds_karaoke.mp4"

# Verses metadata.
karaoke_lyrics = verses

In [5]:
audio_duration = extract_audio_duration(karaoke_music)

audio_duration

60.107755

In [6]:
create_ass_file(
    verses,
    output_path= output_ass_path / "droppin_seeds_karaoke.ass",
    audio_duration=audio_duration,
    font="Arial",
    fontsize=10,
    title="test karaoke",
    # loader_duration=True,
)

___
___
___

In [13]:
class Loader:
    """
    A simple loader to display progress while the process runs.
    """
    def __init__(self, message="Loading..."):
        self.message = message
        self.running = True

    def start(self):
        self.running = True
        while self.running:
            for char in "|/-\\":
                print(f"\r{self.message} {char}", end="", flush=True)
                time.sleep(0.2)

    def stop(self):
        self.running = False
        print("\r", end="", flush=True)

    def __enter__(self):
        from threading import Thread
        self.thread = Thread(target=self.start)
        self.thread.start()

    def __exit__(self, exc_type, exc_value, traceback):
        self.stop()
        self.thread.join()

In [14]:
def add_subtitles_to_video(
    audio_path,
    ass_path,
    output_path,
    background_video=None,
    fps=30,
    codec=None,
    bitrate="500k",
    audio_codec="aac",
    preset="medium",
    threads=4,
    ffmpeg_params=None,
    logger="bar"
):
    """
    Combine audio, subtitles, and optionally a background video into a karaoke video.

    Args:
        audio_path (str): Path to the audio file (e.g., .mp3).
        ass_path (str): Path to the ASS subtitle file.
        output_path (str): Path to save the generated video.
        background_video (str, optional): Path to the background video. If None, a blank background is used.
        fps (int): Frames per second for the video.
        codec (str): Video codec to use. If None, auto-detected based on device.
        bitrate (str): Bitrate for the video.
        audio_codec (str): Audio codec to use. Set to None to disable audio.
        preset (str): Encoding preset for balancing speed and quality.
        threads (int): Number of CPU threads to use for encoding.
        ffmpeg_params (list): Additional FFMPEG parameters as a list of strings.
        logger (str): Type of logger ("bar" for loader, "none" for no loader).

    Returns:
        None
    """
    # Detect CUDA or CPU
    device = torch.device(f"cuda:{torch.cuda.current_device()}" if torch.cuda.is_available() else "cpu")
    is_cuda = device.type == "cuda"
    print(f"Running on device: {device.type.upper()}")

    # Auto-detect codec based on device if not provided
    if codec is None:
        codec = "h264_nvenc" if is_cuda else "libx264"

    # Calculate audio duration
    result = subprocess.run(
        ["ffprobe", "-i", audio_path, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    audio_duration = float(result.stdout.decode("utf-8").strip())
    print(f"Audio duration detected: {audio_duration} seconds")

    # Build the base FFMPEG command
    if background_video:
        command = [
            "ffmpeg",
            "-i", background_video,
            "-i", audio_path,
            "-vf", f"subtitles={ass_path}",
        ]
    else:
        command = [
            "ffmpeg",
            "-f", "lavfi",
            "-i", f"color=c=black:s=1280x720:d={audio_duration}",  # Match background to audio duration
            "-i", audio_path,
            "-vf", f"subtitles={ass_path}",
        ]

    # Add video encoding settings
    command.extend([
        "-c:v", codec,        # Use GPU or CPU codec
        "-preset", preset,    # Encoding preset for speed vs quality
        "-b:v", bitrate,      # Video bitrate
        "-r", str(fps),       # Frame rate
    ])

    # Add audio encoding settings if applicable
    if audio_codec:
        command.extend(["-c:a", audio_codec])
    else:
        command.extend(["-an"])  # Disable audio

    # Set CPU threads (relevant for CPU encoding)
    if not is_cuda:
        command.extend(["-threads", str(threads)])

    # Add additional FFMPEG parameters
    if ffmpeg_params:
        command.extend(ffmpeg_params)

    # Ensure the output file is included
    command.append(output_path)

    # Debugging: Print the constructed command
    print("Constructed FFMPEG command:")
    print(" ".join(command))

    # Run the command with a loader or plain logging
    print("\nStarting FFMPEG process...\n")
    try:
        if logger == "bar":
            with Loader("Processing video..."):
                subprocess.run(command, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        else:
            subprocess.run(command, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        print(f"✅ Video successfully created at: {output_path}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error occurred while running FFMPEG:\n{e.stderr.decode('utf-8')}")


In [None]:
"""
def add_subtitles_to_video(
    audio_path,
    ass_path,
    output_path,
    background_video=None,
    fps=30,
    codec=None,
    bitrate="500k",
    audio_codec="aac",
    preset="medium",
    threads=4,
    ffmpeg_params=None,
    logger="bar"
):
"""

ass_path = Path("karaoke.ass")

add_subtitles_to_video(
    audio_path=str(karaoke_music),
    ass_path=str(ass_path),
    output_path=str(karaoke_video),
)



### WORKS
___
___

In [None]:
import subprocess

def test_ffmpeg_with_audio(input_audio, output_audio):
    """
    Test FFMPEG functionality by converting an audio file from MP3 to AAC.

    Args:
        input_audio (str): Path to the input audio file (MP3).
        output_audio (str): Path to save the converted AAC audio.

    Returns:
        None
    """
    command = [
        "ffmpeg",
        "-i", input_audio,
        "-c:a", "aac",
        "-b:a", "128k",  # Audio bitrate
        output_audio
    ]

    print("Running FFMPEG command:")
    print(" ".join(command))

    try:
        result = subprocess.run(command, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        
        print(f"\n✅ Conversion successful. Output saved at: {output_audio}")
    except subprocess.CalledProcessError as e:
        print(f"\n❌ Error occurred while running FFMPEG:\n{e.stderr.decode('utf-8')}")


instrumental_file_path = Path("./audio_processing/karaoke_files/input_instrumentals/")
vocal_file_path = Path("./audio_processing/karaoke_files/input_vocals/")
font_file_path = Path("./audio_processing/karaoke_files/input_fonts/")
lyrics_file_path = Path("./audio_processing/karaoke_files/input_lyrics/")
output_video_path = Path("./audio_processing/karaoke_files/output_videos/")

karaoke_video = output_video_path / "droppin_seeds_karaoke.mp4"
karaoke_music = instrumental_file_path / "droppin_seeds.mp3"
karaoke_font = font_file_path / "enter-the-gungeon-big.ttf"

# Test the function
test_ffmpeg_with_audio(
    input_audio=str(karaoke_music),
    output_audio=str(output_video_path / "droppin_seeds_converted.aac")
)

### WORKS
___
___

In [None]:
import subprocess
import time

def debug_ffmpeg_gpu_with_audio(audio_path, output_path):
    """
    Test FFMPEG functionality with GPU encoding, audio inclusion, and debug logging.

    Args:
        audio_path (str): Path to the input audio file (MP3).
        output_path (str): Path to save the output video.

    Returns:
        None
    """
    # FFMPEG command for GPU encoding and audio inclusion
    command = [
        "ffmpeg",
        "-f", "lavfi",
        "-i", "color=c=black:s=1280x720:d=60",  # Blank video background (60 seconds)
        "-i", audio_path,
        "-c:v", "h264_nvenc",         # GPU encoding
        "-preset", "p5",              # Medium-speed preset
        "-b:v", "1M",                 # Video bitrate
        "-r", "30",                   # Frame rate
        "-c:a", "aac",                # Audio codec
        "-b:a", "128k",               # Audio bitrate
        "-shortest",                  # Stop when the shortest stream ends
        output_path
    ]

    print("\nRunning FFMPEG Command with Audio:")
    print(" ".join(command))

    # Measure execution time
    start_time = time.time()

    try:
        # Run FFMPEG and capture all output
        result = subprocess.run(command, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        end_time = time.time()

        print(f"\n✅ Test Completed Successfully in {end_time - start_time:.2f} seconds")
        print(f"Output saved at: {output_path}")
        print("\nFFMPEG Standard Output:")
        print(result.stdout.decode("utf-8"))
        print("\nFFMPEG Standard Error:")
        print(result.stderr.decode("utf-8"))

    except subprocess.CalledProcessError as e:
        print("\n❌ Error occurred during the test:")
        print(e.stderr.decode('utf-8'))

# Paths for testing
audio_path = "./audio_processing/karaoke_files/input_instrumentals/droppin_seeds.mp3"
output_path = "./audio_processing/karaoke_files/output_videos/test_gpu_video_with_audio_debug.mp4"

# Run the test
debug_ffmpeg_gpu_with_audio(audio_path, output_path)


In [None]:
final_video.write_videofile(
    str(karaoke_video),
    fps=12,                          # Reduced FPS for speed
    codec="libx264",                 # CPU-based encoding
    bitrate="500k",                  # Lower bitrate for speed
    audio_codec=None,                # Disable audio for testing
    preset="ultrafast",              # Fastest preset
    threads=8,                       # Use all CPU threads
    ffmpeg_params=["-crf", "30"],    # Higher CRF for faster compression
    logger="bar",                    # Show progress bar
)

final_video.write_videofile(
    str(karaoke_video),
    fps=24,
    codec="libx264",
    bitrate="2000k",  # Higher bitrate for better quality
    audio_codec="aac",
    audio_bitrate="128k",  # Standard audio quality
    preset="medium",  # Balance between encoding speed and compression
    threads=4,  # Use more threads for faster encoding
    ffmpeg_params=["-crf", "23"],  # Balanced CRF value
    logger="bar",
)
final_video.write_videofile(
    str(karaoke_video),
    fps=24,                          # Your specified frame rate
    codec="libx264",                 # Efficient and widely compatible
    bitrate="500k",                  # Low bitrate for smaller file size
    audio_codec="aac",               # Standard audio codec
    audio_bitrate="64k",             # Low audio quality
    preset="ultrafast",              # Fast encoding, less compression
    threads=2,                       # Use 2 threads for encoding
    ffmpeg_params=["-crf", "28"],    # Higher CRF value for more compression
    logger="bar"                     # Show a progress bar
)

final_video.write_videofile(
    str(karaoke_video),
    fps=24,                          # Your specified frame rate
    codec="libx264",                 # Best for high-quality MP4 videos
    bitrate="3000k",                 # Optimal bitrate for a balance of quality and file size
    audio_codec="aac",               # Standard codec for audio
    audio_bitrate="192k",            # High-quality audio bitrate
    preset="medium",                 # Encoding speed vs compression trade-off
    threads=4,                       # Adjust based on the number of CPU cores
    ffmpeg_params=["-crf", "23"],    # Constant Rate Factor (CRF) for quality tuning
    logger="bar"                     # Show a progress bar
)
