In [5]:
pip install demucs librosa pandas numpy

Collecting demucs
  Downloading demucs-4.0.1.tar.gz (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dora-search (from demucs)
  Downloading dora_search-0.1.12.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.1/87.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting lameenc>=1.2 (from demucs)
  Downloading lameenc-1.8.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (9.9 kB)
Collecting openunmix (from demucs)
  Downloading openunmix-1.3.0-py3-none-any.whl.metadata (17 kB)
Collecting retrying (from dora-search->demucs)
  Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB)
Coll

In [7]:
pip install -U demucs



In [8]:
import os
import librosa
import numpy as np
import pandas as pd
import subprocess
import tempfile

def separate_audio_demucs(audio_file_path, output_dir="./separated"):
    """
    Separate audio into vocals and instrumental using Demucs

    :param audio_file_path: Path to input audio file
    :param output_dir: Directory where separated stems will be saved
    :return: Tuple of paths to (vocals_path, instrumental_path)
    """

    # Run Demucs separation
    cmd = ["demucs", "--two-stems=vocals", audio_file_path, "-o", output_dir]

    try:
        # Execute Demucs command
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        print("Demucs separation completed successfully")

        # Find the output directory created by Demucs
        base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
        model_dir = os.path.join(output_dir, "htdemucs", base_name)

        # Define expected output paths
        vocals_path = os.path.join(model_dir, "vocals.wav")
        no_vocals_path = os.path.join(model_dir, "no_vocals.wav")

        # Check if files exist
        if os.path.exists(vocals_path) and os.path.exists(no_vocals_path):
            return vocals_path, no_vocals_path
        else:
            # Alternative: check for instrumental instead of no_vocals
            instrumental_path = os.path.join(model_dir, "instrumental.wav")
            if os.path.exists(instrumental_path):
                return vocals_path, instrumental_path
            else:
                raise FileNotFoundError("Separated audio files not found")

    except subprocess.CalledProcessError as e:
        print(f"Error running Demucs: {e}")
        print(f"stderr: {e.stderr}")
        raise

def extract_audio_features(audio_file_path, n_mfcc=13, sr=22050):
    """
    Extract audio features for machine learning classification

    :param audio_file_path: Path to audio file
    :param n_mfcc: Number of MFCC coefficients to extract
    :param sr: Sample rate for audio loading
    :return: Dictionary of extracted features
    """

    # Load audio file
    y, sr = librosa.load(audio_file_path, sr=sr)

    features = {}

    # MFCCs (Mel-Frequency Cepstral Coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    for i in range(n_mfcc):
        features[f'mfcc_{i}_mean'] = np.mean(mfccs[i])
        features[f'mfcc_{i}_std'] = np.std(mfccs[i])

    # Spectral features
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
    features['spectral_centroid_mean'] = np.mean(spectral_centroids)
    features['spectral_centroid_std'] = np.std(spectral_centroids)

    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)

    # Rhythm features
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features['tempo'] = tempo

    # Chroma features
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    features['chroma_stft_mean'] = np.mean(chroma_stft)
    features['chroma_stft_std'] = np.std(chroma_stft)

    # Zero crossing rate
    zcr = librosa.feature.zero_crossing_rate(y)
    features['zcr_mean'] = np.mean(zcr)
    features['zcr_std'] = np.std(zcr)

    # Root Mean Square Energy
    rms = librosa.feature.rms(y=y)
    features['rms_mean'] = np.mean(rms)
    features['rms_std'] = np.std(rms)

    # Spectral bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)

    return features

def process_song_for_dataset(song_path, genre_label, csv_file_path="music_genre_dataset.csv"):
    """
    Complete pipeline: separate song, extract features, and save to CSV

    :param song_path: Path to the original song file
    :param genre_label: Genre label (e.g., 'rock', 'jazz', 'pop')
    :param csv_file_path: Path to output CSV file
    """

    print(f"Processing: {song_path}")

    try:
        # Step 1: Separate audio using Demucs
        vocals_path, instrumental_path = separate_audio_demucs(song_path)
        print(f"Vocals: {vocals_path}")
        print(f"Instrumental: {instrumental_path}")

        # Step 2: Extract features from original, vocals, and instrumental
        original_features = extract_audio_features(song_path)
        vocals_features = extract_audio_features(vocals_path)
        instrumental_features = extract_audio_features(instrumental_path)

        # Step 3: Combine all features with prefixes
        row_data = {'genre': genre_label, 'song_file': os.path.basename(song_path)}

        # Add original features
        for key, value in original_features.items():
            row_data[f'original_{key}'] = value

        # Add vocals features
        for key, value in vocals_features.items():
            row_data[f'vocals_{key}'] = value

        # Add instrumental features
        for key, value in instrumental_features.items():
            row_data[f'instrumental_{key}'] = value

        # Step 4: Save to CSV
        df_row = pd.DataFrame([row_data])

        if not os.path.isfile(csv_file_path):
            df_row.to_csv(csv_file_path, index=False)
            print(f"Created new dataset file: {csv_file_path}")
        else:
            df_row.to_csv(csv_file_path, mode='a', header=False, index=False)
            print(f"Appended to existing dataset: {csv_file_path}")

        print(f"Successfully processed: {song_path}")

        # Optional: Clean up temporary files
        # os.remove(vocals_path)
        # os.remove(instrumental_path)

    except Exception as e:
        print(f"Error processing {song_path}: {e}")

# Alternative approach using torchaudio (if you prefer programmatic approach)
def separate_audio_torchaudio(audio_file_path):
    """
    Alternative method using torchaudio's HDEMUCS pipeline
    """
    import torchaudio
    from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS

    # Load pipeline
    bundle = HDEMUCS_HIGH_MUSDB_PLUS
    model = bundle.get_model()

    # Load audio
    waveform, sample_rate = torchaudio.load(audio_file_path)

    # Separate sources
    with torch.no_grad():
        sources = model(waveform)

    # Sources: [drums, bass, other, vocals]
    vocals = sources[:, 3]  # vocals are typically the 4th source
    instrumental = sources[:, 0] + sources[:, 1] + sources[:, 2]  # drums + bass + other

    return vocals, instrumental

# Example usage
if __name__ == "__main__":
    # Process a single song
    process_song_for_dataset("/content/Selfie-Pulla.mp3", "pop")

    # Process multiple songs in a directory
    # genres = ['rock', 'jazz', 'classical', 'hiphop']
    # for genre in genres:
    #     genre_dir = f"./songs/{genre}"
    #     for song_file in os.listdir(genre_dir):
    #         if song_file.endswith(('.mp3', '.wav', '.flac')):
    #             song_path = os.path.join(genre_dir, song_file)
    #             process_song_for_dataset(song_path, genre)

Processing: /content/Selfie-Pulla.mp3
Demucs separation completed successfully
Vocals: ./separated/htdemucs/Selfie-Pulla/vocals.wav
Instrumental: ./separated/htdemucs/Selfie-Pulla/no_vocals.wav
Created new dataset file: music_genre_dataset.csv
Successfully processed: /content/Selfie-Pulla.mp3


In [9]:
process_song_for_dataset("/content/Pakkam-Vanthu.mp3", "Hip-Pop", "my_dataset.csv")

Processing: /content/Pakkam-Vanthu.mp3
Demucs separation completed successfully
Vocals: ./separated/htdemucs/Pakkam-Vanthu/vocals.wav
Instrumental: ./separated/htdemucs/Pakkam-Vanthu/no_vocals.wav
Created new dataset file: my_dataset.csv
Successfully processed: /content/Pakkam-Vanthu.mp3


In [None]:
process_song_for_dataset("/content/Nee-Yaaro.mp3", "", "my_dataset.csv")