In [2]:
#!pip install demucs openai-whisper torchaudio

In [5]:
#!pip install moviepy

In [2]:
#!pip install FuzzyTM
#!pip install blosc2
#!pip install cython
#!pip install sentencepiece


In [9]:
#!pip install ffmpeg-python

In [10]:
#!pip install moviepy

In [5]:
import os

folders = ["input", "separated", "vocals", "subtitles"]
for folder in folders:
    os.makedirs(folder, exist_ok=True)

print("📁 Folder structure ready. Place your .mp3 files in the 'input/' folder.")

📁 Folder structure ready. Place your .mp3 files in the 'input/' folder.


In [1]:
import subprocess
from pathlib import Path

def separate_vocals(song_path):
    print(f"🎧 Separating vocals from: {song_path}")
    
    song_path_fixed = Path(song_path).as_posix()  # ensures forward slashes

    command = [
        "python", "-m", "demucs",
        "--two-stems", "vocals",
        "-o", "separated",
        song_path_fixed
    ]

    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("❌ Demucs failed with error:")
        print(e.stderr)  # ← This shows the real problem!
        raise e

    song_name = Path(song_path).stem
    vocal_path = Path("separated") / "htdemucs" / song_name / "vocals.wav"
    return vocal_path


In [2]:
import whisper

def format_timestamp(seconds):
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = int(seconds % 60)
    ms = int((seconds - int(seconds)) * 1000)
    return f"{h:02}:{m:02}:{s:02},{ms:03}"

def transcribe_to_srt(vocal_path):
    print(f"🧠 Transcribing (auto-language): {vocal_path}")
    model = whisper.load_model("large-v2")  # Better for Hindi; can use "large" for high accuracy

    # Auto language detection enabled
    result = model.transcribe(str(vocal_path), language="hi")

    srt_path = Path("subtitles") / (Path(vocal_path).stem + ".srt")
    with open(srt_path, "w", encoding="utf-8") as f:
        for i, segment in enumerate(result["segments"]):
            start = format_timestamp(segment["start"])
            end = format_timestamp(segment["end"])
            text = segment["text"].strip()
            f.write(f"{i+1}\n{start} --> {end}\n{text}\n\n")
    return srt_path

In [3]:
import shutil
import os

def run_pipeline():
    for file in os.listdir("input"):
        if file.endswith(".mp3"):
            song_path = os.path.join("input", file)
            try:
                vocal_path = separate_vocals(song_path)

                # Move vocal to central storage
                song_name = Path(song_path).stem
                vocal_target = Path("vocals") / f"{song_name}.wav"

                # Rename and move vocals.wav
                shutil.move(str(vocal_path), str(vocal_target))

                # Transcribe and save SRT
                srt_file = transcribe_to_srt(vocal_target)
                print(f"✅ Done for {file}: Subtitles saved to {srt_file}")
            except Exception as e:
                print(f"❌ Error processing {file}: {e}")

run_pipeline()

🎧 Separating vocals from: input\Aaj Sajeya Goldie Sohel 320 Kbps.mp3
[1mImportant: the default model was recently changed to `htdemucs`[0m the latest Hybrid Transformer Demucs model. In some cases, this model can actually perform worse than previous models. To get back the old default model use `-n mdx_extra_q`.
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in C:\Users\Gunamay Gupta\separated\htdemucs
Separating track input\Aaj Sajeya Goldie Sohel 320 Kbps.mp3

🧠 Transcribing (auto-language): vocals\Aaj Sajeya Goldie Sohel 320 Kbps.wav




✅ Done for Aaj Sajeya Goldie Sohel 320 Kbps.mp3: Subtitles saved to subtitles\Aaj Sajeya Goldie Sohel 320 Kbps.srt
🎧 Separating vocals from: input\Aap Ki Nazron Ne Samjha-320kbps.mp3
[1mImportant: the default model was recently changed to `htdemucs`[0m the latest Hybrid Transformer Demucs model. In some cases, this model can actually perform worse than previous models. To get back the old default model use `-n mdx_extra_q`.
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in C:\Users\Gunamay Gupta\separated\htdemucs
Separating track input\Aap Ki Nazron Ne Samjha-320kbps.mp3

🧠 Transcribing (auto-language): vocals\Aap Ki Nazron Ne Samjha-320kbps.wav
✅ Done for Aap Ki Nazron Ne Samjha-320kbps.mp3: Subtitles saved to subtitles\Aap Ki Nazron Ne Samjha-320kbps.srt
🎧 Separating vocals from: input\Dilko Tumse Pyar Hua-320kbps.mp3
[1mImportant: the default model was recently changed to `htdemucs`[0m the latest Hybrid Trans

In [1]:
#!pip install demucs

In [10]:
#!pip install torchaudio

In [12]:
#!pip install ffmpeg-python

In [3]:
#!pip install demucs openai-whisper ffmpeg-python torchaudio

In [6]:
#!pip install ipywidgets

In [1]:
#!pip install librosa