In [1]:
import os
import csv
import pandas as pd
import librosa
import numpy as np
from pathlib import Path
from tqdm import tqdm
import gc
import re
import glob
import pickle

import soundfile as sf
from musicnn.extractor import extractor as musicnn_extractor

import torch
import resampy
from transformers import Wav2Vec2Processor, Wav2Vec2Model

import warnings
warnings.filterwarnings('ignore')






  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MTGJamendoFeatureExtractor:
    def __init__(self, data_path, metadata_file="autotagging_moodtheme.tsv"):
        """
        Initialize the feature extractor for MTG-Jamendo dataset
        
        Args:
            data_path (str): Path to your data directory (e.g., "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data")
            metadata_file (str): Name of the metadata TSV file
        """
        self.data_path = Path(data_path)
        self.metadata_file = metadata_file
        self.audio_base_dir = self.data_path / "mtg-jamendo-data"
        self.metadata_df = None
        self.mapping_validation = None
        
    def load_metadata(self):
        metadata_path = self.data_path / self.metadata_file
        if not metadata_path.exists():
            raise FileNotFoundError(f"Metadata file not found at {metadata_path}")

        print(f"Loading metadata (with tag‑fix) from: {metadata_path}")

        # Read raw lines and fix rows with extra tabs in the TAGS column
        fixed_rows = []
        with open(metadata_path, 'r', encoding='utf-8', newline='') as f:
            reader = csv.reader(f, delimiter='\t')
            header = next(reader)
            num_cols = len(header)  # should be 6
            for row in reader:
                if len(row) > num_cols:
                    # join all “extra” columns into the last (TAGS) field
                    row = row[: num_cols-1 ] + ['\t'.join(row[num_cols-1:])]
                elif len(row) < num_cols:
                    # optionally skip or pad
                    row = row + ['']*(num_cols - len(row))
                fixed_rows.append(row)

        # Create DataFrame
        self.metadata_df = pd.DataFrame(fixed_rows, columns=header)
        print(f"Loaded {len(self.metadata_df)} tracks from metadata")
        print(f"Columns: {list(self.metadata_df.columns)}\n")
        print(self.metadata_df.head(3))
        return self.metadata_df
    
    def validate_audio_directory(self):
        """Validate the audio directory structure and check for numbered folders 00-99"""
        if not self.audio_base_dir.exists():
            raise FileNotFoundError(f"Audio base directory not found: {self.audio_base_dir}")
        
        # Check for numbered folders 00-99
        numbered_dirs = []
        for i in range(100):
            dir_name = f"{i:02d}"  # Format as 00, 01, 02, ..., 99
            dir_path = self.audio_base_dir / dir_name
            if dir_path.exists() and dir_path.is_dir():
                numbered_dirs.append(dir_name)
        
        print(f"Found audio base directory: {self.audio_base_dir}")
        print(f"Found {len(numbered_dirs)} numbered subdirectories (00-99)")
        
        if len(numbered_dirs) == 0:
            raise FileNotFoundError("No numbered subdirectories (00-99) found in audio directory")
        
        # Count total MP3 files
        total_mp3_count = 0
        for dir_name in numbered_dirs[:5]:  # Sample first 5 directories
            dir_path = self.audio_base_dir / dir_name
            mp3_files = list(dir_path.glob("*.mp3"))
            total_mp3_count += len(mp3_files)
            print(f"  Directory {dir_name}: {len(mp3_files)} MP3 files")
        
        print(f"Sample shows {total_mp3_count} MP3 files in first 5 directories")
        return True
    


    def get_audio_path(self, track_id):
        # assume self.metadata_df is loaded
        row = self.metadata_df.loc[self.metadata_df['TRACK_ID'] == track_id]
        if row.empty:
            raise KeyError(f"No metadata found for track {track_id}")
        relpath = row.iloc[0]['PATH']     # e.g. "48/948.mp3"
        fullpath = self.audio_base_dir / relpath
        if not fullpath.exists():
            raise FileNotFoundError(f"Audio file not found at {fullpath}")
        return fullpath
    
    def validate_mapping(self, sample_size=100):
        """
        Validate mapping between metadata TSV and actual audio files
        
        Args:
            sample_size (int): Number of random samples to check
        
        Returns:
            dict: Validation results with statistics
        """
        print(f"\n{'='*50}")
        print("VALIDATING AUDIO-METADATA MAPPING")
        print(f"{'='*50}")
        
        if self.metadata_df is None:
            self.load_metadata()
        
        # Sample tracks for validation
        sample_df = self.metadata_df.sample(min(sample_size, len(self.metadata_df)), random_state=42)
        
        validation_results = {
            'total_checked': len(sample_df),
            'found': 0,
            'missing': 0,
            'found_tracks': [],
            'missing_tracks': [],
            'sample_paths': []
        }
        
        print(f"Checking {len(sample_df)} random tracks from metadata...")
        
        # Check if we can identify the track ID column
        possible_id_columns = ['TRACK_ID', 'track_id', 'id', 'ID', 'TrackID']
        track_id_col = None
        
        for col in possible_id_columns:
            if col in self.metadata_df.columns:
                track_id_col = col
                break
        
        if track_id_col is None:
            print("Available columns:", list(self.metadata_df.columns))
            raise ValueError("Could not identify track ID column. Please check column names.")
        
        print(f"Using '{track_id_col}' as track ID column")
        
        for idx, row in sample_df.iterrows():
            track_id = row[track_id_col]
            audio_path = self.get_audio_path(track_id)
            
            if audio_path.exists():
                validation_results['found'] += 1
                validation_results['found_tracks'].append({
                    'track_id': track_id,
                    'path': str(audio_path),
                    'size_mb': audio_path.stat().st_size / (1024*1024)
                })
                # Store first 5 found paths as samples
                if len(validation_results['sample_paths']) < 5:
                    validation_results['sample_paths'].append(str(audio_path))
            else:
                validation_results['missing'] += 1
                validation_results['missing_tracks'].append({
                    'track_id': track_id,
                    'expected_path': str(audio_path)
                })
        
        # Calculate statistics
        found_percentage = (validation_results['found'] / validation_results['total_checked']) * 100
        
        print(f"\nVALIDATION RESULTS:")
        print(f"  Total checked: {validation_results['total_checked']}")
        print(f"  Found: {validation_results['found']} ({found_percentage:.1f}%)")
        print(f"  Missing: {validation_results['missing']} ({100-found_percentage:.1f}%)")
        
        if validation_results['sample_paths']:
            print(f"\nSample found audio paths:")
            for path in validation_results['sample_paths']:
                print(f"  {path}")
        
        if validation_results['missing_tracks'] and len(validation_results['missing_tracks']) <= 5:
            print(f"\nMissing tracks (showing first 5):")
            for track in validation_results['missing_tracks'][:5]:
                print(f"  Track {track['track_id']}: Expected at {track['expected_path']}")
        
        # Check folder distribution for found tracks
        if validation_results['found_tracks']:
            folder_distribution = {}
            for track in validation_results['found_tracks']:
                folder = Path(track['path']).parent.name
                folder_distribution[folder] = folder_distribution.get(folder, 0) + 1
            
            print(f"\nFolder distribution of found tracks:")
            for folder, count in sorted(folder_distribution.items()):
                print(f"  Folder {folder}: {count} tracks")
        
        self.mapping_validation = validation_results
        
        if found_percentage < 50:
            print(f"\n⚠️  WARNING: Only {found_percentage:.1f}% of tracks found!")
            print("This might indicate an issue with the folder structure or file naming.")
        else:
            print(f"\n✅ Validation successful: {found_percentage:.1f}% of tracks found")
        
        return validation_results
    
    def load_audio_file(self, file_path, sr=22050, duration=30.0):
        """
        Load an audio file using librosa
        
        Args:
            file_path (Path): Path to the audio file
            sr (int): Sample rate (default: 22050 Hz)
            duration (float): Duration to load in seconds (default: 30.0 for 30-second clips)
        
        Returns:
            tuple: (audio_data, sample_rate) or (None, None) if loading fails
        """
        try:
            # Load audio file
            y, sr_actual = librosa.load(file_path, sr=sr, duration=duration)
            return y, sr_actual
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return None, None
    
    def process_dataset(self, feature_extractor_func, max_files=None, save_interval=100, validate_first=True):
        """
        Main processing function to extract features from all audio files
        
        Args:
            feature_extractor_func: Your feature extraction function
            max_files (int): Limit processing to first N files (for testing)
            save_interval (int): Save intermediate results every N files
            validate_first (bool): Whether to validate mapping before processing
        
        Returns:
            pd.DataFrame: DataFrame with extracted features
        """
        # Load metadata and validate audio directory
        self.load_metadata()
        self.validate_audio_directory()
        
        # Run validation check first
        if validate_first:
            validation_results = self.validate_mapping(sample_size=50)
            
            # Ask user if they want to continue if validation shows issues
            if validation_results['found'] / validation_results['total_checked'] < 0.5:
                print("\n⚠️  Low success rate in validation. Please check the issues above.")
                response = input("Do you want to continue anyway? (y/n): ")
                if response.lower() != 'y':
                    print("Processing cancelled.")
                    return None
        
        # 2) Try to resume from the latest pickle
        results = []
        processed_count = 0
        pkl_pattern = str(self.data_path / "features_intermediate_*.pkl")
        all_pkls = glob.glob(pkl_pattern)
        if all_pkls:
            # Extract numbers, find the max
            nums = [int(re.search(r'_(\d+)\.pkl$', p).group(1)) for p in all_pkls]
            latest = max(nums)
            latest_path = self.data_path / f"features_intermediate_{latest}.pkl"
            print(f"Resuming from checkpoint: {latest_path}")
            # Load it
            results = pd.read_pickle(latest_path).to_dict('records')
            processed_count = latest

        failed_count = 0
        
        # 3) Determine which rows remain to be processed
        df_full = self.metadata_df
        # If max_files is set, it caps the total to process (incl. resumed)
        total_to_do = max_files if max_files is not None else len(df_full)
        start_idx = processed_count
        df_to_process = df_full.iloc[start_idx : total_to_do]
        
        # Identify track ID column
        possible_id_columns = ['TRACK_ID', 'track_id', 'id', 'ID', 'TrackID']
        track_id_col = None
        for col in possible_id_columns:
            if col in self.metadata_df.columns:
                track_id_col = col
                break
        
        if track_id_col is None:
            raise ValueError(f"Could not identify track ID column. Available columns: {list(self.metadata_df.columns)}")
        
        print(f"Resuming processing at index {start_idx} (track #{processed_count+1})")
        print(f"Tracks left: {len(df_to_process)}")
        print(f"Using '{track_id_col}' as track ID column")
        
        # Process each track
        for idx, row in tqdm(df_to_process.iterrows(), total=len(df_to_process), desc="Processing audio"):
            track_id = row[track_id_col]
            
            # Get audio file path
            audio_path = self.get_audio_path(track_id)
            
            if not audio_path.exists():
                # print(f"Audio file not found: {audio_path}")  # Comment out to reduce noise
                failed_count += 1
                continue
            
            # Load audio
            audio_data, sample_rate = self.load_audio_file(audio_path)
            
            if audio_data is None:
                failed_count += 1
                continue
            
            # Extract features using your feature extractor
            try:
                features = feature_extractor_func(audio_data, sample_rate)
                
                # Combine metadata with features
                result_row = {
                    'track_id': track_id,
                    'audio_path': str(audio_path),
                    'duration': len(audio_data) / sample_rate,
                    'sample_rate': sample_rate,
                    **features  # Unpack feature dictionary
                }
                
                # Add original metadata columns
                for col in row.index:
                    if col not in result_row:
                        result_row[f'meta_{col}'] = row[col]
                
                results.append(result_row)
                processed_count += 1
                
                # Save intermediate results
                if processed_count % save_interval == 0:
                    temp_df = pd.DataFrame(results)
                    # 2) Save CSV checkpoint
                    csv_path = self.data_path / f"features_intermediate_{processed_count}.csv"
                    temp_df.to_csv(csv_path, index=False)  

                    # 3) Save Pickle checkpoint
                    pkl_path = self.data_path / f"features_intermediate_{processed_count}.pkl"
                    temp_df.to_pickle(pkl_path)  

                    # 4) Optional: free memory if needed
                    del temp_df  
                    gc.collect()

                    print(f"Saved intermediate results at {processed_count} files →")
                    print(f"  • CSV:  {csv_path}")
                    print(f"  • Pickle: {pkl_path}")
                
            except Exception as e:
                print(f"Feature extraction failed for {track_id}: {e}")
                failed_count += 1
        
        # Create final DataFrame
        features_df = pd.DataFrame(results)
        
        print(f"\nProcessing complete!")
        print(f"Successfully processed: {processed_count} files")
        print(f"Failed: {failed_count} files")
        if len(features_df) > 0:
            print(f"Feature DataFrame shape: {features_df.shape}")
        
        return features_df


In [3]:
def quick_validation_check():
    """
    Quick function to just validate the mapping without doing feature extraction
    Run this first to make sure everything is set up correctly
    """
    data_path = "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data"
    extractor = MTGJamendoFeatureExtractor(data_path)
    
    print("Loading metadata and validating audio directory structure...")
    extractor.load_metadata()
    extractor.validate_audio_directory()
    
    print("\nValidating audio-metadata mapping...")
    validation_results = extractor.validate_mapping(sample_size=100)
    
    return validation_results

def musicnn_feature_extractor(audio_data: np.ndarray,
                              sample_rate: int,
                              model: str = 'MTT_musicnn') -> dict:
    """
    Given raw audio (numpy) + sample rate, save to temp WAV,
    run musicnn.extractor to get intermediate representations.
    Returns a dict with keys like 'timbral', 'temporal', 'cnn1', ...,
    'penultimate' (for musicnn models) :contentReference[oaicite:5]{index=5}.
    """
    tmp_path = 'temp_clip.wav'
    sf.write(tmp_path, audio_data, sample_rate)  
    
    # extractor returns (taggram, tags, features_dict)
    _, _, feats = musicnn_extractor(tmp_path,
                             model=model,
                             extract_features=True)
    return feats


# Load once at module scope:
processor = Wav2Vec2Processor.from_pretrained('facebook/wav2vec2-base-960h')
model     = Wav2Vec2Model.from_pretrained('facebook/wav2vec2-base-960h')
model.eval()

def wav2vec_feature_extractor(audio_data: np.ndarray,
                              sample_rate: int) -> dict:
    """
    Resample to 16 kHz, tokenize with Wav2Vec2Processor,
    run Wav2Vec2Model, and average last_hidden_state to
    get one fixed-length embedding per clip :contentReference[oaicite:7]{index=7}.
    """
    # Resample if needed
    if sample_rate != 16000:
        audio_data = resampy.resample(audio_data, sample_rate, 16000)
    
    # Tokenize & run model
    inputs = processor(audio_data,
                       sampling_rate=16000,
                       return_tensors='pt',
                       padding=True)
    with torch.no_grad():
        outputs = model(**inputs)  # outputs.last_hidden_state shape (1, T, 768)
    
    # Mean‑pool over the time dimension → (768,)
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
    return {'wav2vec2_embedding': embedding}

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
data_path = "E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset/data"

DATA_ROOT = Path("E:/Oxford/Extra/ICASSP/Draft_1/mtg-jamendo-dataset")

In [None]:
# 1) musicnn embeddings
extractor = MTGJamendoFeatureExtractor(data_path)
df_musicnn = extractor.process_dataset(
    feature_extractor_func=musicnn_feature_extractor,
    max_files=None,
    save_interval=1000,
    validate_first=False
)
# # display it
# print(df_musicnn.T)  # transpose so columns become rows for easier reading
df_musicnn.to_csv(DATA_ROOT / 'mtg_jamendo_musicnn_embeddings.csv',
                  index=False)
df_musicnn.to_pickle(DATA_ROOT / 'mtg_jamendo_musicnn_embeddings.pkl')

Loading metadata (with tag‑fix) from: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\autotagging_moodtheme.tsv
Loaded 18486 tracks from metadata
Columns: ['TRACK_ID', 'ARTIST_ID', 'ALBUM_ID', 'PATH', 'DURATION', 'TAGS']

        TRACK_ID      ARTIST_ID      ALBUM_ID        PATH DURATION  \
0  track_0000948  artist_000087  album_000149  48/948.mp3    212.7   
1  track_0000950  artist_000087  album_000149  50/950.mp3    248.0   
2  track_0000951  artist_000087  album_000149  51/951.mp3    199.7   

                      TAGS  
0  mood/theme---background  
1  mood/theme---background  
2  mood/theme---background  
Found audio base directory: E:\Oxford\Extra\ICASSP\Draft_1\mtg-jamendo-dataset\data\mtg-jamendo-data
Found 100 numbered subdirectories (00-99)
  Directory 00: 202 MP3 files
  Directory 01: 200 MP3 files
  Directory 02: 207 MP3 files
  Directory 03: 190 MP3 files
  Directory 04: 193 MP3 files
Sample shows 992 MP3 files in first 5 directories
Resuming from checkpoint: E:\O

Processing audio:   0%|          | 0/12486 [00:00<?, ?it/s]

Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 1/12486 [01:30<313:21:57, 90.36s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 2/12486 [01:34<136:35:59, 39.39s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 3/12486 [01:36<78:18:53, 22.59s/it] 

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 4/12486 [01:39<51:22:28, 14.82s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 5/12486 [01:42<36:04:58, 10.41s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 6/12486 [01:44<26:06:19,  7.53s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 7/12486 [01:46<20:06:56,  5.80s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 8/12486 [01:48<16:12:43,  4.68s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 9/12486 [01:50<13:25:49,  3.88s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 10/12486 [01:52<11:31:30,  3.33s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 11/12486 [01:56<11:29:19,  3.32s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 12/12486 [01:58<10:10:41,  2.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 13/12486 [02:00<9:42:09,  2.80s/it] 

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 14/12486 [02:02<8:50:56,  2.55s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 15/12486 [02:04<8:18:01,  2.40s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 16/12486 [02:07<8:17:38,  2.39s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 17/12486 [02:09<7:52:18,  2.27s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 18/12486 [02:11<7:38:54,  2.21s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 19/12486 [02:13<7:29:28,  2.16s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 20/12486 [02:15<7:25:58,  2.15s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 21/12486 [02:17<7:16:08,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 22/12486 [02:20<8:01:32,  2.32s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 23/12486 [02:24<9:49:43,  2.84s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 24/12486 [02:26<9:20:30,  2.70s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 25/12486 [02:28<8:54:23,  2.57s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 26/12486 [02:31<8:54:49,  2.58s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 27/12486 [02:33<8:40:02,  2.50s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 28/12486 [02:35<8:22:34,  2.42s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 29/12486 [02:38<8:01:33,  2.32s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 30/12486 [02:40<8:28:08,  2.45s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 31/12486 [02:43<8:14:47,  2.38s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 32/12486 [02:45<8:05:24,  2.34s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 33/12486 [02:47<7:55:55,  2.29s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 34/12486 [02:49<7:40:16,  2.22s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 35/12486 [02:52<8:05:18,  2.34s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 36/12486 [02:54<7:50:34,  2.27s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 37/12486 [02:56<7:37:14,  2.20s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 38/12486 [02:58<7:36:06,  2.20s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 39/12486 [03:00<7:27:34,  2.16s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 40/12486 [03:02<7:18:13,  2.11s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 41/12486 [03:04<7:05:20,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 42/12486 [03:06<6:58:01,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 43/12486 [03:08<6:52:42,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 44/12486 [03:10<6:53:59,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 45/12486 [03:12<6:53:44,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 46/12486 [03:14<6:51:19,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 47/12486 [03:16<7:23:29,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 48/12486 [03:18<7:13:36,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 49/12486 [03:20<7:04:05,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 50/12486 [03:22<7:03:43,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 51/12486 [03:25<7:20:12,  2.12s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 52/12486 [03:27<7:20:47,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 53/12486 [03:29<7:11:27,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 54/12486 [03:31<7:05:08,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 55/12486 [03:33<6:56:48,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 56/12486 [03:35<6:58:01,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 57/12486 [03:37<7:01:18,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 58/12486 [03:39<6:58:43,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 59/12486 [03:41<7:32:01,  2.18s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 60/12486 [03:43<7:22:17,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 61/12486 [03:45<7:11:13,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   0%|          | 62/12486 [03:47<7:05:23,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 63/12486 [03:49<7:01:07,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 64/12486 [03:51<6:55:15,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 65/12486 [03:53<6:55:17,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 66/12486 [03:55<7:02:44,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 67/12486 [03:57<7:00:10,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 68/12486 [03:59<6:56:25,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 69/12486 [04:01<6:53:48,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 70/12486 [04:03<6:50:03,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 71/12486 [04:06<7:28:18,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 72/12486 [04:08<7:13:42,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 73/12486 [04:10<7:03:45,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 74/12486 [04:12<6:56:34,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 75/12486 [04:13<6:50:02,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 76/12486 [04:15<6:41:59,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 77/12486 [04:17<6:39:35,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 78/12486 [04:19<6:36:51,  1.92s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 79/12486 [04:21<6:47:57,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 80/12486 [04:23<7:00:55,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 81/12486 [04:25<6:53:08,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 82/12486 [04:27<6:48:59,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 83/12486 [04:30<7:22:02,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 84/12486 [04:32<7:12:10,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 85/12486 [04:34<7:08:55,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 86/12486 [04:36<7:04:10,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 87/12486 [04:38<7:03:41,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 88/12486 [04:40<7:06:04,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 89/12486 [04:42<6:56:20,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 90/12486 [04:44<6:52:11,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 91/12486 [04:46<6:45:31,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 92/12486 [04:48<6:45:20,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 93/12486 [04:49<6:43:09,  1.95s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 94/12486 [04:51<6:45:19,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 95/12486 [04:54<7:21:05,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 96/12486 [04:56<7:15:34,  2.11s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 97/12486 [04:58<7:07:18,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 98/12486 [05:00<7:10:40,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 99/12486 [05:02<7:02:34,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 100/12486 [05:04<7:01:01,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 101/12486 [05:06<6:54:27,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 102/12486 [05:08<6:51:50,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 103/12486 [05:10<6:48:39,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 104/12486 [05:12<6:50:57,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 105/12486 [05:14<6:49:40,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 106/12486 [05:16<6:52:17,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 107/12486 [05:19<7:28:34,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 108/12486 [05:21<7:20:31,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 109/12486 [05:23<7:09:53,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 110/12486 [05:24<6:59:04,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 111/12486 [05:27<7:03:18,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 112/12486 [05:29<7:03:48,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 113/12486 [05:31<7:03:15,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 114/12486 [05:33<6:59:51,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 115/12486 [05:35<6:59:04,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 116/12486 [05:37<6:55:23,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 117/12486 [05:39<6:58:17,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 118/12486 [05:41<6:57:13,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 119/12486 [05:43<6:56:42,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 120/12486 [05:45<7:31:10,  2.19s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 121/12486 [05:47<7:18:51,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 122/12486 [05:49<7:10:44,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 123/12486 [05:51<7:00:17,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 124/12486 [05:53<7:04:40,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 125/12486 [05:56<7:09:07,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 126/12486 [05:57<7:01:55,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 127/12486 [06:00<7:05:20,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 128/12486 [06:02<6:55:52,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 129/12486 [06:03<6:51:07,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 130/12486 [06:05<6:52:23,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 131/12486 [06:07<6:51:14,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 132/12486 [06:10<7:29:13,  2.18s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 133/12486 [06:12<7:20:47,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 134/12486 [06:14<7:10:08,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 135/12486 [06:16<7:06:11,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 136/12486 [06:18<7:04:29,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 137/12486 [06:20<6:56:45,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 138/12486 [06:22<7:00:25,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 139/12486 [06:24<6:58:04,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 140/12486 [06:26<6:53:12,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 141/12486 [06:28<6:50:18,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 142/12486 [06:30<6:49:51,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 143/12486 [06:32<6:49:45,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 144/12486 [06:35<7:25:25,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 145/12486 [06:37<7:12:15,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 146/12486 [06:39<7:01:25,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 147/12486 [06:41<6:58:56,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 148/12486 [06:43<7:03:48,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 149/12486 [06:45<7:01:39,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 150/12486 [06:47<6:54:33,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 151/12486 [06:49<6:49:58,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 152/12486 [06:51<6:58:36,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 153/12486 [06:53<6:49:51,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 154/12486 [06:55<6:49:44,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 155/12486 [06:57<6:49:51,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|          | 156/12486 [06:59<7:26:35,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 157/12486 [07:01<7:15:33,  2.12s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 158/12486 [07:03<7:03:06,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 159/12486 [07:05<6:59:34,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 160/12486 [07:07<6:53:23,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 161/12486 [07:09<7:03:51,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 162/12486 [07:11<6:59:38,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 163/12486 [07:13<6:56:06,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 164/12486 [07:15<7:02:13,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 165/12486 [07:17<7:01:05,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 166/12486 [07:19<6:50:58,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 167/12486 [07:21<6:51:15,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 168/12486 [07:24<7:19:53,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 169/12486 [07:26<7:17:09,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 170/12486 [07:28<7:11:31,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 171/12486 [07:30<7:02:03,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 172/12486 [07:32<7:02:17,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 173/12486 [07:34<6:57:52,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 174/12486 [07:36<6:53:56,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 175/12486 [07:38<6:53:28,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 176/12486 [07:40<7:03:34,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 177/12486 [07:42<7:00:46,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 178/12486 [07:44<6:56:33,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 179/12486 [07:46<6:55:51,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 180/12486 [07:49<7:25:47,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 181/12486 [07:51<7:16:47,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 182/12486 [07:53<7:07:02,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 183/12486 [07:55<7:05:17,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 184/12486 [07:57<6:57:56,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 185/12486 [07:59<6:52:18,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 186/12486 [08:01<6:51:59,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   1%|▏         | 187/12486 [08:02<6:46:40,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 188/12486 [08:04<6:43:38,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 189/12486 [08:06<6:42:51,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 190/12486 [08:08<6:43:37,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 191/12486 [08:10<6:44:01,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 192/12486 [08:13<7:21:21,  2.15s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 193/12486 [08:15<7:10:58,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 194/12486 [08:17<7:11:14,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 195/12486 [08:19<7:00:18,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 196/12486 [08:21<6:59:19,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 197/12486 [08:23<7:00:13,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 198/12486 [08:25<6:58:47,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 199/12486 [08:27<6:54:33,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 200/12486 [08:29<6:54:41,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 201/12486 [08:31<6:50:45,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 202/12486 [08:33<6:46:37,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 203/12486 [08:35<6:47:13,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 204/12486 [08:37<7:16:14,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 205/12486 [08:39<7:05:15,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 206/12486 [08:41<6:58:15,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 207/12486 [08:43<6:50:51,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 208/12486 [08:45<6:49:28,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 209/12486 [08:47<6:44:21,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 210/12486 [08:49<6:39:15,  1.95s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 211/12486 [08:51<6:49:22,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 212/12486 [08:53<6:45:24,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 213/12486 [08:55<6:48:00,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 214/12486 [08:57<6:54:55,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 215/12486 [08:59<6:57:17,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 216/12486 [09:02<7:27:31,  2.19s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 217/12486 [09:04<7:15:39,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 218/12486 [09:06<7:13:16,  2.12s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 219/12486 [09:08<7:02:43,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 220/12486 [09:10<7:09:57,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 221/12486 [09:12<7:04:27,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 222/12486 [09:14<6:56:16,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 223/12486 [09:16<6:54:40,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 224/12486 [09:18<6:50:19,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 225/12486 [09:20<6:50:17,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 226/12486 [09:22<6:48:14,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 227/12486 [09:24<6:53:49,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 228/12486 [09:27<7:30:16,  2.20s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 229/12486 [09:29<7:18:34,  2.15s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 230/12486 [09:31<7:08:08,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 231/12486 [09:33<7:07:46,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 232/12486 [09:35<6:56:21,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 233/12486 [09:37<6:56:23,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 234/12486 [09:39<6:49:39,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 235/12486 [09:41<6:55:43,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 236/12486 [09:43<6:56:20,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 237/12486 [09:45<6:56:42,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 238/12486 [09:47<6:54:26,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 239/12486 [09:49<6:51:02,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 240/12486 [09:51<7:22:36,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 241/12486 [09:53<7:13:41,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 242/12486 [09:55<7:03:30,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 243/12486 [09:57<6:57:59,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 244/12486 [09:59<6:52:13,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 245/12486 [10:01<6:47:17,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 246/12486 [10:03<6:46:32,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 247/12486 [10:05<6:42:51,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 248/12486 [10:07<6:40:21,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 249/12486 [10:09<6:36:16,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 250/12486 [10:11<6:41:06,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 251/12486 [10:13<6:39:35,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 252/12486 [10:15<6:42:53,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 253/12486 [10:17<7:13:22,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 254/12486 [10:19<6:58:59,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 255/12486 [10:21<6:51:58,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 256/12486 [10:23<6:49:35,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 257/12486 [10:25<6:47:38,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 258/12486 [10:27<6:44:54,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 259/12486 [10:29<6:39:25,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 260/12486 [10:31<6:34:02,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 261/12486 [10:33<6:33:15,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 262/12486 [10:35<6:31:49,  1.92s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 263/12486 [10:37<6:35:52,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 264/12486 [10:39<6:44:06,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 265/12486 [10:41<7:17:37,  2.15s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 266/12486 [10:43<7:05:48,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 267/12486 [10:45<6:56:53,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 268/12486 [10:47<6:49:52,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 269/12486 [10:49<6:43:15,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 270/12486 [10:51<6:39:56,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 271/12486 [10:53<6:43:27,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 272/12486 [10:55<6:36:36,  1.95s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 273/12486 [10:57<6:41:33,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 274/12486 [10:59<6:42:41,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 275/12486 [11:01<6:39:29,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 276/12486 [11:03<6:43:57,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 277/12486 [11:05<7:19:47,  2.16s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 278/12486 [11:07<7:12:49,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 279/12486 [11:10<7:25:25,  2.19s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 280/12486 [11:12<7:27:44,  2.20s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 281/12486 [11:14<7:38:56,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 282/12486 [11:17<7:53:13,  2.33s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 283/12486 [11:19<7:50:38,  2.31s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 284/12486 [11:21<7:41:51,  2.27s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 285/12486 [11:24<7:43:02,  2.28s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 286/12486 [11:26<7:40:16,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 287/12486 [11:28<7:39:43,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 288/12486 [11:30<7:42:41,  2.28s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 289/12486 [11:33<8:22:33,  2.47s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 290/12486 [11:36<8:12:54,  2.42s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 291/12486 [11:38<8:06:10,  2.39s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 292/12486 [11:40<8:03:03,  2.38s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 293/12486 [11:43<7:57:33,  2.35s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 294/12486 [11:45<8:01:59,  2.37s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 295/12486 [11:47<7:51:22,  2.32s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 296/12486 [11:50<7:48:57,  2.31s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 297/12486 [11:52<7:38:32,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 298/12486 [11:54<7:38:11,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 299/12486 [11:56<7:34:15,  2.24s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 300/12486 [11:58<7:39:23,  2.26s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 301/12486 [12:02<8:27:43,  2.50s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 302/12486 [12:04<8:09:28,  2.41s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 303/12486 [12:06<7:52:20,  2.33s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 304/12486 [12:08<7:34:00,  2.24s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 305/12486 [12:10<7:30:47,  2.22s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 306/12486 [12:12<7:22:44,  2.18s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 307/12486 [12:14<7:14:56,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 308/12486 [12:16<7:03:59,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 309/12486 [12:18<7:06:06,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 310/12486 [12:20<6:56:27,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 311/12486 [12:22<7:01:02,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   2%|▏         | 312/12486 [12:24<7:00:32,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 313/12486 [12:27<7:33:41,  2.24s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 314/12486 [12:29<7:19:25,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 315/12486 [12:31<7:11:56,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 316/12486 [12:33<7:03:20,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 317/12486 [12:35<7:01:02,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 318/12486 [12:37<7:02:46,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 319/12486 [12:39<7:02:36,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 320/12486 [12:41<7:02:51,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 321/12486 [12:43<7:02:57,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 322/12486 [12:45<6:56:03,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 323/12486 [12:47<6:53:45,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 324/12486 [12:50<6:55:22,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 325/12486 [12:52<7:35:01,  2.24s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 326/12486 [12:54<7:20:23,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 327/12486 [12:56<7:18:27,  2.16s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 328/12486 [12:58<7:06:53,  2.11s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 329/12486 [13:00<7:04:54,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 330/12486 [13:03<7:02:39,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 331/12486 [13:05<7:14:19,  2.14s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 332/12486 [13:07<7:07:46,  2.11s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 333/12486 [13:09<7:11:17,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 334/12486 [13:11<7:04:14,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 335/12486 [13:13<7:00:00,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 336/12486 [13:15<6:54:09,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 337/12486 [13:18<7:27:13,  2.21s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 338/12486 [13:20<7:14:19,  2.15s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 339/12486 [13:22<7:23:06,  2.19s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 340/12486 [13:24<7:11:54,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 341/12486 [13:26<7:05:47,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 342/12486 [13:28<6:56:56,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 343/12486 [13:30<6:50:26,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 344/12486 [13:32<6:47:17,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 345/12486 [13:34<6:39:20,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 346/12486 [13:36<6:36:19,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 347/12486 [13:38<6:39:55,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 348/12486 [13:40<6:42:56,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 349/12486 [13:42<7:20:11,  2.18s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 350/12486 [13:44<7:10:21,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 351/12486 [13:46<7:02:31,  2.09s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 352/12486 [13:48<6:59:08,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 353/12486 [13:50<6:57:32,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 354/12486 [13:53<7:03:42,  2.10s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 355/12486 [13:55<6:59:07,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 356/12486 [13:57<6:59:10,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 357/12486 [13:59<6:54:23,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 358/12486 [14:01<6:56:18,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 359/12486 [14:03<6:54:16,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 360/12486 [14:05<6:54:37,  2.05s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 361/12486 [14:07<7:27:20,  2.21s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 362/12486 [14:09<7:18:26,  2.17s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 363/12486 [14:11<7:09:08,  2.12s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 364/12486 [14:14<7:07:58,  2.12s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 365/12486 [14:16<6:56:51,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 366/12486 [14:17<6:48:37,  2.02s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 367/12486 [14:19<6:39:18,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 368/12486 [14:21<6:36:44,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 369/12486 [14:23<6:32:10,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 370/12486 [14:25<6:37:52,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 371/12486 [14:27<6:35:21,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 372/12486 [14:29<6:37:25,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 373/12486 [14:32<7:09:14,  2.13s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 374/12486 [14:33<6:55:25,  2.06s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 375/12486 [14:35<6:49:01,  2.03s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 376/12486 [14:37<6:44:02,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 377/12486 [14:39<6:41:29,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 378/12486 [14:41<6:38:52,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 379/12486 [14:43<6:39:17,  1.98s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 380/12486 [14:45<6:36:20,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 381/12486 [14:47<6:34:30,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 382/12486 [14:49<6:30:03,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 383/12486 [14:51<6:30:06,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 384/12486 [14:53<6:29:10,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 385/12486 [14:55<6:26:03,  1.91s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 386/12486 [14:57<6:58:55,  2.08s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 387/12486 [14:59<6:50:34,  2.04s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 388/12486 [15:01<6:44:06,  2.00s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 389/12486 [15:03<6:36:42,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 390/12486 [15:05<6:32:31,  1.95s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 391/12486 [15:07<6:29:54,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 392/12486 [15:09<6:28:04,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 393/12486 [15:11<6:28:05,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 394/12486 [15:13<6:36:05,  1.97s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 395/12486 [15:15<6:30:35,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 396/12486 [15:16<6:31:24,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 397/12486 [15:18<6:27:43,  1.92s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 398/12486 [15:21<6:57:58,  2.07s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 399/12486 [15:23<6:45:50,  2.01s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 400/12486 [15:25<6:40:29,  1.99s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 401/12486 [15:26<6:35:34,  1.96s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 402/12486 [15:28<6:32:31,  1.95s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 403/12486 [15:30<6:30:00,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 404/12486 [15:32<6:28:35,  1.93s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 405/12486 [15:34<6:31:23,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 406/12486 [15:36<6:30:35,  1.94s/it]

done!
Computing spectrogram (w/ librosa) and tags (w/ tensorflow).. 

Processing audio:   3%|▎         | 407/12486 [15:39<7:13:52,  2.16s/it]

done!


In [None]:
# 2) wav2vec2 embeddings
extractor = MTGJamendoFeatureExtractor(data_path)
df_wav2vec = extractor.process_dataset(
    feature_extractor_func=wav2vec_feature_extractor,
    max_files=None,
    save_interval=1000,
    validate_first=True
)
# # display it
# print(df_wav2vec.T)  # transpose so columns become rows for easier reading
df_wav2vec.to_csv(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings.csv',
                  index=False)
df_wav2vec.to_pickle(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings.pkl')

In [None]:
emb = df_wav2vec.pop('wav2vec2_embedding').tolist()
df_wav2vec[[f'w2v_{i}' for i in range(emb[0].shape[0])]] = pd.DataFrame(emb)

df_wav2vec.to_csv(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings_v2.csv',
                  index=False)
df_wav2vec.to_pickle(DATA_ROOT / 'mtg_jamendo_wav2vec2_embeddings_v2.pkl')