In [1]:
import os
import csv
import pandas as pd
import librosa
import numpy as np
from pathlib import Path
from tqdm import tqdm
import gc
import re
import glob
import pickle

import soundfile as sf
from musicnn.extractor import extractor as musicnn_extractor

import torch
import resampy
from transformers import Wav2Vec2Processor, Wav2Vec2Model

# Add CLAP import
from laion_clap import CLAP_Module

import warnings
warnings.filterwarnings('ignore')






  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MTGJamendoFeatureExtractor:
    def __init__(self, data_path, metadata_file="autotagging_moodtheme.tsv"):
        """
        Initialize the feature extractor for MTG-Jamendo dataset
        
        Args:
            data_path (str): Path to your data directory (e.g., "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data")
            metadata_file (str): Name of the metadata TSV file
        """
        self.data_path = Path(data_path)
        self.metadata_file = metadata_file
        self.audio_base_dir = self.data_path / "mtg-jamendo-data"
        self.metadata_df = None
        self.mapping_validation = None
        
    def load_metadata(self):
        metadata_path = self.data_path / self.metadata_file
        if not metadata_path.exists():
            raise FileNotFoundError(f"Metadata file not found at {metadata_path}")

        print(f"Loading metadata (with tag‑fix) from: {metadata_path}")

        # Read raw lines and fix rows with extra tabs in the TAGS column
        fixed_rows = []
        with open(metadata_path, 'r', encoding='utf-8', newline='') as f:
            reader = csv.reader(f, delimiter='\t')
            header = next(reader)
            num_cols = len(header)  # should be 6
            for row in reader:
                if len(row) > num_cols:
                    # join all "extra" columns into the last (TAGS) field
                    row = row[: num_cols-1 ] + ['\t'.join(row[num_cols-1:])]
                elif len(row) < num_cols:
                    # optionally skip or pad
                    row = row + ['']*(num_cols - len(row))
                fixed_rows.append(row)

        # Create DataFrame
        self.metadata_df = pd.DataFrame(fixed_rows, columns=header)
        print(f"Loaded {len(self.metadata_df)} tracks from metadata")
        print(f"Columns: {list(self.metadata_df.columns)}\n")
        print(self.metadata_df.head(3))
        return self.metadata_df
    
    def validate_audio_directory(self):
        """Validate the audio directory structure and check for numbered folders 00-99"""
        if not self.audio_base_dir.exists():
            raise FileNotFoundError(f"Audio base directory not found: {self.audio_base_dir}")
        
        # Check for numbered folders 00-99
        numbered_dirs = []
        for i in range(100):
            dir_name = f"{i:02d}"  # Format as 00, 01, 02, ..., 99
            dir_path = self.audio_base_dir / dir_name
            if dir_path.exists() and dir_path.is_dir():
                numbered_dirs.append(dir_name)
        
        print(f"Found audio base directory: {self.audio_base_dir}")
        print(f"Found {len(numbered_dirs)} numbered subdirectories (00-99)")
        
        if len(numbered_dirs) == 0:
            raise FileNotFoundError("No numbered subdirectories (00-99) found in audio directory")
        
        # Count total MP3 files
        total_mp3_count = 0
        for dir_name in numbered_dirs[:5]:  # Sample first 5 directories
            dir_path = self.audio_base_dir / dir_name
            mp3_files = list(dir_path.glob("*.mp3"))
            total_mp3_count += len(mp3_files)
            print(f"  Directory {dir_name}: {len(mp3_files)} MP3 files")
        
        print(f"Sample shows {total_mp3_count} MP3 files in first 5 directories")
        return True
    


    def get_audio_path(self, track_id):
        # assume self.metadata_df is loaded
        row = self.metadata_df.loc[self.metadata_df['TRACK_ID'] == track_id]
        if row.empty:
            raise KeyError(f"No metadata found for track {track_id}")
        relpath = row.iloc[0]['PATH']     # e.g. "48/948.mp3"
        fullpath = self.audio_base_dir / relpath
        if not fullpath.exists():
            raise FileNotFoundError(f"Audio file not found at {fullpath}")
        return fullpath
    
    def validate_mapping(self, sample_size=100):
        """
        Validate mapping between metadata TSV and actual audio files
        
        Args:
            sample_size (int): Number of random samples to check
        
        Returns:
            dict: Validation results with statistics
        """
        print(f"\n{'='*50}")
        print("VALIDATING AUDIO-METADATA MAPPING")
        print(f"{'='*50}")
        
        if self.metadata_df is None:
            self.load_metadata()
        
        # Sample tracks for validation
        sample_df = self.metadata_df.sample(min(sample_size, len(self.metadata_df)), random_state=42)
        
        validation_results = {
            'total_checked': len(sample_df),
            'found': 0,
            'missing': 0,
            'found_tracks': [],
            'missing_tracks': [],
            'sample_paths': []
        }
        
        print(f"Checking {len(sample_df)} random tracks from metadata...")
        
        # Check if we can identify the track ID column
        possible_id_columns = ['TRACK_ID', 'track_id', 'id', 'ID', 'TrackID']
        track_id_col = None
        
        for col in possible_id_columns:
            if col in self.metadata_df.columns:
                track_id_col = col
                break
        
        if track_id_col is None:
            print("Available columns:", list(self.metadata_df.columns))
            raise ValueError("Could not identify track ID column. Please check column names.")
        
        print(f"Using '{track_id_col}' as track ID column")
        
        for idx, row in sample_df.iterrows():
            track_id = row[track_id_col]
            audio_path = self.get_audio_path(track_id)
            
            if audio_path.exists():
                validation_results['found'] += 1
                validation_results['found_tracks'].append({
                    'track_id': track_id,
                    'path': str(audio_path),
                    'size_mb': audio_path.stat().st_size / (1024*1024)
                })
                # Store first 5 found paths as samples
                if len(validation_results['sample_paths']) < 5:
                    validation_results['sample_paths'].append(str(audio_path))
            else:
                validation_results['missing'] += 1
                validation_results['missing_tracks'].append({
                    'track_id': track_id,
                    'expected_path': str(audio_path)
                })
        
        # Calculate statistics
        found_percentage = (validation_results['found'] / validation_results['total_checked']) * 100
        
        print(f"\nVALIDATION RESULTS:")
        print(f"  Total checked: {validation_results['total_checked']}")
        print(f"  Found: {validation_results['found']} ({found_percentage:.1f}%)")
        print(f"  Missing: {validation_results['missing']} ({100-found_percentage:.1f}%)")
        
        if validation_results['sample_paths']:
            print(f"\nSample found audio paths:")
            for path in validation_results['sample_paths']:
                print(f"  {path}")
        
        if validation_results['missing_tracks'] and len(validation_results['missing_tracks']) <= 5:
            print(f"\nMissing tracks (showing first 5):")
            for track in validation_results['missing_tracks'][:5]:
                print(f"  Track {track['track_id']}: Expected at {track['expected_path']}")
        
        # Check folder distribution for found tracks
        if validation_results['found_tracks']:
            folder_distribution = {}
            for track in validation_results['found_tracks']:
                folder = Path(track['path']).parent.name
                folder_distribution[folder] = folder_distribution.get(folder, 0) + 1
            
            print(f"\nFolder distribution of found tracks:")
            for folder, count in sorted(folder_distribution.items()):
                print(f"  Folder {folder}: {count} tracks")
        
        self.mapping_validation = validation_results
        
        if found_percentage < 50:
            print(f"\n⚠️  WARNING: Only {found_percentage:.1f}% of tracks found!")
            print("This might indicate an issue with the folder structure or file naming.")
        else:
            print(f"\n✅ Validation successful: {found_percentage:.1f}% of tracks found")
        
        return validation_results
    
    def load_audio_file(self, file_path, sr=22050, duration=30.0):
        """
        Load an audio file using librosa
        
        Args:
            file_path (Path): Path to the audio file
            sr (int): Sample rate (default: 22050 Hz)
            duration (float): Duration to load in seconds (default: 30.0 for 30-second clips)
        
        Returns:
            tuple: (audio_data, sample_rate) or (None, None) if loading fails
        """
        try:
            # Load audio file
            y, sr_actual = librosa.load(file_path, sr=sr, duration=duration)
            return y, sr_actual
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return None, None
    
    def process_dataset(self,
                        feature_extractor_func,
                        start_index: int = 0,
                        end_index: int = None,
                        save_interval: int = 1000,
                        validate_first: bool = True,
                        output_dir_name: str = "clap_features") -> pd.DataFrame:
        """
        Main processing function to extract features from all audio files
        
        Args:
            feature_extractor_func: Your feature extraction function
            start_index (int): Starting index for processing
            end_index (int): Ending index for processing (None for all)
            save_interval (int): Save intermediate results every N files
            validate_first (bool): Whether to validate mapping before processing
            output_dir_name (str): Name of output directory for intermediate saves
        
        Returns:
            pd.DataFrame: DataFrame with extracted features
        """
        # Load metadata and validate audio directory
        self.load_metadata()
        self.validate_audio_directory()
        
        # Run validation check first
        if validate_first:
            validation_results = self.validate_mapping(sample_size=50)
            
            # Ask user if they want to continue if validation shows issues
            if validation_results['found'] / validation_results['total_checked'] < 0.5:
                print("\n⚠️  Low success rate in validation. Please check the issues above.")
                response = input("Do you want to continue anyway? (y/n): ")
                if response.lower() != 'y':
                    print("Processing cancelled.")
                    return None
        
        # 2) Slice the DataFrame to only the desired segment
        total = len(self.metadata_df)
        end_index = end_index or total
        df_segment = self.metadata_df.iloc[start_index:end_index]  # slice with iloc

        print(f"Processing tracks {start_index} to {end_index} (total {len(df_segment)})")
        
        # 2) Try to resume from the latest pickle
        results = []
        processed_count = start_index
        failed_count = 0
        last_pkl = None
        
        # Identify track ID column
        possible_id_columns = ['TRACK_ID', 'track_id', 'id', 'ID', 'TrackID']
        track_id_col = None
        for col in possible_id_columns:
            if col in self.metadata_df.columns:
                track_id_col = col
                break
        
        if track_id_col is None:
            raise ValueError(f"Could not identify track ID column. Available columns: {list(self.metadata_df.columns)}")
        
        print(f"Processing at index {start_index} (track #{processed_count+1})")
        print(f"Tracks left: {len(df_segment)}")
        print(f"Using '{track_id_col}' as track ID column")
        
        # Create output directory for intermediate saves
        output_dir = self.data_path / output_dir_name
        output_dir.mkdir(exist_ok=True)
        
        # Process each track
        for idx, row in tqdm(df_segment.iterrows(), total=len(df_segment), desc="Processing audio"):
            track_id = row[track_id_col]
            
            # Get audio file path
            audio_path = self.get_audio_path(track_id)
            
            if not audio_path.exists():
                # print(f"Audio file not found: {audio_path}")  # Comment out to reduce noise
                failed_count += 1
                continue
            
            # Load audio
            audio_data, sample_rate = self.load_audio_file(audio_path)
            
            if audio_data is None:
                failed_count += 1
                continue
            
            # Extract features using your feature extractor
            try:
                features = feature_extractor_func(audio_data, sample_rate)
                
                # Combine metadata with features
                result_row = {
                    'track_id': track_id,
                    'audio_path': str(audio_path),
                    'duration': len(audio_data) / sample_rate,
                    'sample_rate': sample_rate,
                    **features  # Unpack feature dictionary
                }
                
                # Add original metadata columns
                for col in row.index:
                    if col not in result_row:
                        result_row[f'meta_{col}'] = row[col]
                
                results.append(result_row)
                processed_count += 1
                
                # Save intermediate results
                if processed_count % save_interval == 0:
                    temp_df = pd.DataFrame(results)
                    # 2) Save CSV checkpoint
                    csv_path = output_dir / f"features_intermediate_{processed_count}.csv"
                    temp_df.to_csv(csv_path, index=False)  

                    # 3) Save Pickle checkpoint
                    pkl_path = output_dir / f"features_intermediate_{processed_count}.pkl"
                    temp_df.to_pickle(pkl_path) 

                    # delete previous snapshot if any
                    if last_pkl and os.path.exists(last_pkl):
                        os.remove(last_pkl)      # remove old file
                    last_pkl = str(pkl_path) 

                    # 4) Optional: free memory if needed
                    del temp_df  
                    gc.collect()

                    print(f"Saved intermediate results at {processed_count} files →")
                    print(f"  • CSV:  {csv_path}")
                    print(f"  • Pickle: {pkl_path}")
                
            except Exception as e:
                print(f"Feature extraction failed for {track_id}: {e}")
                failed_count += 1
        
        # Create final DataFrame
        features_df = pd.DataFrame(results)
        
        print(f"\nProcessing complete!")
        print(f"Successfully processed: {processed_count} files")
        print(f"Failed: {failed_count} files")
        if len(features_df) > 0:
            print(f"Feature DataFrame shape: {features_df.shape}")
        
        return features_df

In [3]:
def quick_validation_check():
    """
    Quick function to just validate the mapping without doing feature extraction
    Run this first to make sure everything is set up correctly
    """
    data_path = "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data"
    extractor = MTGJamendoFeatureExtractor(data_path)
    
    print("Loading metadata and validating audio directory structure...")
    extractor.load_metadata()
    extractor.validate_audio_directory()
    
    print("\nValidating audio-metadata mapping...")
    validation_results = extractor.validate_mapping(sample_size=100)
    
    return validation_results

def musicnn_feature_extractor(audio_data: np.ndarray,
                              sample_rate: int,
                              model: str = 'MTT_musicnn') -> dict:
    """
    Given raw audio (numpy) + sample rate, save to temp WAV,
    run musicnn.extractor to get intermediate representations.
    Returns a dict with keys like 'timbral', 'temporal', 'cnn1', ...,
    'penultimate' (for musicnn models).
    """
    tmp_path = 'temp_clip.wav'
    sf.write(tmp_path, audio_data, sample_rate)  
    
    # extractor returns (taggram, tags, features_dict)
    _, _, feats = musicnn_extractor(tmp_path,
                             model=model,
                             extract_features=True)
    return feats


# Load wav2vec once at module scope:
processor = Wav2Vec2Processor.from_pretrained('facebook/wav2vec2-base-960h')
model     = Wav2Vec2Model.from_pretrained('facebook/wav2vec2-base-960h')
model.eval()

def wav2vec_feature_extractor(audio_data: np.ndarray,
                              sample_rate: int) -> dict:
    """
    Resample to 16 kHz, tokenize with Wav2Vec2Processor,
    run Wav2Vec2Model, and average last_hidden_state to
    get one fixed-length embedding per clip.
    """
    # Resample if needed
    if sample_rate != 16000:
        audio_data = resampy.resample(audio_data, sample_rate, 16000)
    
    # Tokenize & run model
    inputs = processor(audio_data,
                       sampling_rate=16000,
                       return_tensors='pt',
                       padding=True)
    with torch.no_grad():
        outputs = model(**inputs)  # outputs.last_hidden_state shape (1, T, 768)
    
    # Mean‑pool over the time dimension → (768,)
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
    return {'wav2vec2_embedding': embedding}


# ============================================================================
# CLAP Feature Extractor
# ============================================================================

# Initialize CLAP model with local checkpoint
print("Loading CLAP model...")
ckpt_path = "D:/Models/630k-audioset-best.pt"
clap_model = CLAP_Module(enable_fusion=False)
clap_model.load_ckpt(ckpt_path)  # loads from your local copy
clap_model.eval()
print("CLAP model loaded successfully!")

def clap_feature_extractor(audio_data: np.ndarray, sample_rate: int) -> dict:
    """
    Extract CLAP embeddings from raw audio.
    Resamples to 48 kHz, reshapes to (1, T), and pools the output.
    
    Args:
        audio_data (np.ndarray): Raw audio data
        sample_rate (int): Sample rate of the audio
        
    Returns:
        dict: Dictionary containing 'clap_embedding' key with embedding vector
    """
    # Resample to 48 kHz if needed (CLAP requires 48kHz)
    if sample_rate != 48000:
        audio_data = resampy.resample(audio_data, sample_rate, 48000)
    
    # Shape to (batch, time) - CLAP expects batch dimension
    audio_data = audio_data.reshape(1, -1)
    
    # Get numpy embeddings from CLAP
    audio_embed = clap_model.get_audio_embedding_from_data(x=audio_data, use_tensor=False)
    
    # Squeeze to 1D array
    embedding = np.squeeze(audio_embed, axis=0)
    
    return {"clap_embedding": embedding}

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading CLAP model...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Load the specified checkpoint D:/Models/630k-audioset-best.pt from users.
Load Checkpoint...
logit_scale_a 	 Loaded
logit_scale_t 	 Loaded
audio_branch.spectrogram_extractor.stft.conv_real.weight 	 Loaded
audio_branch.spectrogram_extractor.stft.conv_imag.weight 	 Loaded
audio_branch.logmel_extractor.melW 	 Loaded
audio_branch.bn0.weight 	 Loaded
audio_branch.bn0.bias 	 Loaded
audio_branch.patch_embed.proj.weight 	 Loaded
audio_branch.patch_embed.proj.bias 	 Loaded
audio_branch.patch_embed.norm.weight 	 Loaded
audio_branch.patch_embed.norm.bias 	 Loaded
audio_branch.layers.0.blocks.0.norm1.weight 	 Loaded
audio_branch.layers.0.blocks.0.norm1.bias 	 Loaded
audio_branch.layers.0.blocks.0.attn.relative_position_bias_table 	 Loaded
audio_branch.layers.0.blocks.0.attn.qkv.weight 	 Loaded
audio_branch.layers.0.blocks.0.attn.qkv.bias 	 Loaded
audio_branch.layers.0.blocks.0.attn.proj.weight 	 Loaded
audio_branch.layers.0.blocks.0.attn.proj.bias 	 Loaded
audio_branch.layers.0.blocks.0.norm2.weig

In [4]:
data_path = "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data"

DATA_ROOT = Path("E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset")

In [5]:
# # 1) musicnn embeddings
# extractor = MTGJamendoFeatureExtractor(data_path)
# df_musicnn = extractor.process_dataset(
#     feature_extractor_func=musicnn_feature_extractor,
#     start_index=0,
#     end_index=None,
#     save_interval=1000,
#     validate_first=False
# )
# df_musicnn.to_csv(DATA_ROOT / 'mtg_jamendo_musicnn_embeddings.csv',
#                   index=False)
# df_musicnn.to_pickle(DATA_ROOT / 'mtg_jamendo_musicnn_embeddings.pkl')

In [6]:
# # 2) wav2vec2 embeddings
# extractor = MTGJamendoFeatureExtractor(data_path)
# df_wav2vec = extractor.process_dataset(
#     feature_extractor_func=wav2vec_feature_extractor,
#     start_index=0,
#     end_index=None,
#     save_interval=1000,
#     validate_first=False
# )
# df_wav2vec.to_csv(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings.csv',
#                   index=False)
# df_wav2vec.to_pickle(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings.pkl')

In [7]:
# # Convert wav2vec embeddings to separate columns for CSV compatibility
# emb = df_wav2vec.pop('wav2vec2_embedding').tolist()
# df_wav2vec[[f'w2v_{i}' for i in range(emb[0].shape[0])]] = pd.DataFrame(emb)

# df_wav2vec.to_csv(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings_v2.csv',
#                   index=False)
# df_wav2vec.to_pickle(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings_v2.pkl')

In [8]:
# 3) CLAP embeddings
extractor = MTGJamendoFeatureExtractor(data_path)
df_clap = extractor.process_dataset(
    feature_extractor_func=clap_feature_extractor,
    start_index=0,
    end_index=None,
    save_interval=1000,
    validate_first=False
)
df_clap.to_csv(DATA_ROOT / 'mtg_jamendo_clap_embeddings.csv',
                  index=False)
df_clap.to_pickle(DATA_ROOT / 'mtg_jamendo_clap_embeddings.pkl')

Loading metadata (with tag‑fix) from: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\autotagging_moodtheme.tsv
Loaded 18486 tracks from metadata
Columns: ['TRACK_ID', 'ARTIST_ID', 'ALBUM_ID', 'PATH', 'DURATION', 'TAGS']

        TRACK_ID      ARTIST_ID      ALBUM_ID        PATH DURATION  \
0  track_0000948  artist_000087  album_000149  48/948.mp3    212.7   
1  track_0000950  artist_000087  album_000149  50/950.mp3    248.0   
2  track_0000951  artist_000087  album_000149  51/951.mp3    199.7   

                      TAGS  
0  mood/theme---background  
1  mood/theme---background  
2  mood/theme---background  
Found audio base directory: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\mtg-jamendo-data
Found 100 numbered subdirectories (00-99)
  Directory 00: 202 MP3 files
  Directory 01: 200 MP3 files
  Directory 02: 207 MP3 files
  Directory 03: 190 MP3 files
  Directory 04: 193 MP3 files
Sample shows 992 MP3 files in first 5 directories
Processing tracks 0 to 18486 (

Processing audio:   5%|▌         | 1000/18486 [1:00:34<26:26:45,  5.44s/it]

Saved intermediate results at 1000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_1000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_1000.pkl


Processing audio:  11%|█         | 2000/18486 [1:58:16<26:05:04,  5.70s/it]

Saved intermediate results at 2000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_2000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_2000.pkl


Processing audio:  16%|█▌        | 3000/18486 [2:56:21<30:21:36,  7.06s/it]

Saved intermediate results at 3000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_3000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_3000.pkl


Processing audio:  22%|██▏       | 4000/18486 [3:53:13<29:59:11,  7.45s/it]

Saved intermediate results at 4000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_4000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_4000.pkl


Processing audio:  27%|██▋       | 5000/18486 [4:51:54<32:43:53,  8.74s/it]

Saved intermediate results at 5000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_5000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_5000.pkl


Processing audio:  32%|███▏      | 6000/18486 [5:48:57<33:58:22,  9.80s/it]

Saved intermediate results at 6000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_6000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_6000.pkl


Processing audio:  38%|███▊      | 7000/18486 [6:48:19<35:26:18, 11.11s/it]

Saved intermediate results at 7000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_7000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_7000.pkl


Processing audio:  43%|████▎     | 8000/18486 [7:45:07<34:41:44, 11.91s/it]

Saved intermediate results at 8000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_8000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_8000.pkl


Processing audio:  49%|████▊     | 9000/18486 [8:45:25<33:58:22, 12.89s/it]

Saved intermediate results at 9000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_9000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_9000.pkl


Processing audio:  54%|█████▍    | 10000/18486 [9:42:49<32:35:58, 13.83s/it]

Saved intermediate results at 10000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_10000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_10000.pkl


Processing audio:  60%|█████▉    | 11000/18486 [10:42:38<32:25:41, 15.59s/it]

Saved intermediate results at 11000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_11000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_11000.pkl


Processing audio:  65%|██████▍   | 12000/18486 [11:41:02<28:17:40, 15.70s/it]

Saved intermediate results at 12000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_12000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_12000.pkl


Processing audio:  70%|███████   | 13000/18486 [12:44:08<25:36:18, 16.80s/it]

Saved intermediate results at 13000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_13000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_13000.pkl


Processing audio:  76%|███████▌  | 14000/18486 [13:42:33<25:24:31, 20.39s/it]

Saved intermediate results at 14000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_14000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_14000.pkl


Processing audio:  81%|████████  | 15000/18486 [14:40:48<23:19:59, 24.10s/it]

Saved intermediate results at 15000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_15000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_15000.pkl


Processing audio:  87%|████████▋ | 16000/18486 [15:38:29<15:14:39, 22.08s/it]

Saved intermediate results at 16000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_16000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_16000.pkl


Processing audio:  92%|█████████▏| 17000/18486 [16:38:55<9:46:59, 23.70s/it] 

Saved intermediate results at 17000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_17000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_17000.pkl


Processing audio:  97%|█████████▋| 18000/18486 [17:33:16<3:03:14, 22.62s/it]

Saved intermediate results at 18000 files →
  • CSV:  E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_18000.csv
  • Pickle: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\clap_features\features_intermediate_18000.pkl


Processing audio: 100%|██████████| 18486/18486 [17:57:33<00:00,  3.50s/it]  



Processing complete!
Successfully processed: 18486 files
Failed: 0 files
Feature DataFrame shape: (18486, 11)


In [9]:
# Convert CLAP embeddings to separate columns for CSV compatibility
emb = df_clap.pop('clap_embedding').tolist()
df_clap[[f'clap_{i}' for i in range(emb[0].shape[0])]] = pd.DataFrame(emb)

df_clap.to_csv(DATA_ROOT / 'mtg_jamendo_clap_embeddings_v2.csv',
                  index=False)
df_clap.to_pickle(DATA_ROOT / 'mtg_jamendo_clap_embeddings_v2.pkl')