In [2]:
import numpy as np
np.float_ = np.float64
np.Inf = np.inf
import os
import soundfile as sf
from tqdm import tqdm
import pandas as pd
import os
import re
from utils import *
import warnings
warnings.filterwarnings('ignore')

## 1. Adquisición y organización de los datasets

In [2]:
datasets, musdb18, musicnet, synthsod = [], [], [], []

for root, subdir, files in os.walk(r'untreated_dataset\\musdb18hq'):
    for file in files:
        if file in ['vocals.wav','drums.wav','bass.wav']:
            musdb18.append(os.path.join(root,file))

for root, subdir, files in os.walk(r'untreated_dataset\\musicnet'):
    for file in files:
        if '.wav' in file:
            musicnet.append(os.path.join(root,file))

for root, subdir, files in os.walk(r'untreated_dataset\\synthsod'):
    if 'Tree' not in root:
        continue
    for file in files:
        if '.flac' in file:
            synthsod.append(os.path.join(root,file))
            
datasets.extend(musdb18)
datasets.extend(musicnet)
datasets.extend(synthsod)

In [3]:
# id | dataset | title | stem | spec | directory |

def search(string):
    stems_dict = {
        'piano':['piano','harpsichord'],
        'accoustic':['horn', 'trombone', 'trumpet', 'tuba', 'bassoon', 'clarinet', 'coranglais', 'flute', 'oboe', 'piccolo'],
        'strings':['bass','cello','harp','viola','violin','violin_1','violin_2','string','strings']
        }
    
    for key, stems in stems_dict.items():
        for stem in stems:
            if stem in string:
                return key
    return ''
    
metadata = {
    'dataset':[],
    'title':[],
    'stem':[],
    'spec':[],
    'formerdir':[]
}

mn_metadata = pd.read_csv(r'untreated_dataset\musicnet\musicnet_metadata.csv')

for direc in datasets:
    parsed = re.split(r'\\+',direc)
    
    if 'musdb18hq' in direc:
        metadata['dataset'] += ['musdb18hq']
        metadata['title'] += [parsed[-2].lower()]
        metadata['stem'] += [parsed[-1].split('.')[0]]
        metadata['spec'] += [parsed[-1].split('.')[0]]
        metadata['formerdir'] += [direc]
    elif 'musicnet' in direc:
        row = mn_metadata.loc[mn_metadata['id']==int(parsed[-1].split('.')[0])]
        spec = row.iat[0,4].lower()
        
        metadata['dataset'] += ['musicnet']
        metadata['title'] += [row['composition']]
        metadata['stem'] += [search(spec)]
        metadata['spec'] += [spec]
        metadata['formerdir'] += [direc]
    elif 'synthsod' in direc:
        spec = parsed[-1].split('.')[0].lower()
        
        metadata['dataset'] += ['synthsod']
        metadata['title'] += [parsed[-3]]
        metadata['stem'] += [search(spec)]
        metadata['spec'] += [spec]
        metadata['formerdir'] += [direc]
    else:
        pass

pd.DataFrame(metadata)['stem'].value_counts()

stem
strings      1853
accoustic    1585
              196
piano         186
Name: count, dtype: int64

In [4]:
df_meta = pd.DataFrame(metadata)
df_meta = pd.concat([
        df_meta.loc[df_meta['stem'].isin(['vocals','bass','drums'])],
        df_meta.loc[df_meta['stem']=='strings'].sample(n=150),
        df_meta.loc[df_meta['stem']=='piano'].sample(n=150),
        df_meta.loc[df_meta['stem']=='accoustic'].sample(n=150)
    ],axis=0,ignore_index=True)
df_meta

Unnamed: 0,dataset,title,stem,spec,formerdir
0,synthsod,symphony_102_2_orch,strings,violin_1,untreated_dataset\\synthsod\SynthSOD_data\symp...
1,synthsod,string_quartet_9_2_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\stri...
2,synthsod,string_quartet_15_1_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\stri...
3,synthsod,minor_works_hess-245_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\mino...
4,synthsod,symphony_093_2_orch,strings,viola,untreated_dataset\\synthsod\SynthSOD_data\symp...
...,...,...,...,...,...
445,synthsod,symphony_6_3-5_orch,accoustic,flute,untreated_dataset\\synthsod\SynthSOD_data\symp...
446,synthsod,mass_in_f_3_orch,accoustic,trombone,untreated_dataset\\synthsod\SynthSOD_data\mass...
447,synthsod,symphony_2_3_orch,accoustic,horn,untreated_dataset\\synthsod\SynthSOD_data\symp...
448,synthsod,sonata_in_g_i_4_orch,accoustic,trombone,untreated_dataset\\synthsod\SynthSOD_data\sona...


In [12]:
from pydub.utils import which
from pydub import AudioSegment
import shutil

AudioSegment.converter = which("ffmpeg")

dirs = []
for index, serie in df_meta.iterrows():
    newdir = f"stems/{serie['stem']}/{serie['stem']}_{serie['title']}_{index}.wav"
    if serie['dataset']=='synthsod':
        flac = AudioSegment.from_file(serie['formerdir'],format="flac")
        flac.export(newdir, format='wav')
    else:
        try:
            shutil.move(serie['formerdir'],newdir)
        except:
            pass
        
    dirs.append(newdir)
    
df_meta['newdir'] = dirs
df_meta.to_json('metadata.json')

## 2. Preparación y preprocesamiento de datos

In [None]:
file_count = sum(len(files) for _, _, files in os.walk('stems'))

with tqdm(total=file_count) as pbar:
    for root, _, files in os.walk('stems'):
        for file in files:
            path = os.path.join(root,file)
            audio = remove_silence(path)
            sf.write(path,audio,44100)
            pbar.update(1)

In [4]:
mixer = cacophony()
mixer.generate_random(1000)
X, Y = mixer.read_from_jams()

Creada carpeta temporal para resguardar memoria.

Confeccionando mixes aleatorios.


100%|██████████| 1000/1000 [1:29:32<00:00,  5.37s/it]



Fusión de archivos .jams


100%|██████████| 1000/1000 [00:04<00:00, 207.36it/s]


Creación de metadata terminada. Eliminada carpeta temporal.

Creada carpeta temporal para resguardar memoria.

Reconstrucción de audios.


100%|██████████| 1000/1000 [26:53<00:00,  1.61s/it]


### 1000 mixes

Tiempo:
* Confección de mixes -> 1:29:32, 5.37s/it
* Fusión de archivos .jams -> 0:04, 207.36it/s
* Reconstrucción de audios -> 26:53, 1.61s/it
* Wall time -> 2:00:16

Recursos (SSD DDR5, 16 GBs):
* Máxima velocidad de lectura -> 71 MB/s
* Máximo espacio de almacenamiento volátil -> 12.687 MB (78% de memoria)
* Espacio final tras procesamiento -> 5.844 MB
* Memoria en almacenamiento ocupada por el .jams -> 10 MB