In [1]:
import numpy as np
np.float_ = np.float64
np.Inf = np.inf
import os
import soundfile as sf
from tqdm import tqdm
import pandas as pd
import os
import re
from utils import *

## 1. Adquisición y organización de los datasets

In [2]:
datasets, musdb18, musicnet, synthsod = [], [], [], []

for root, subdir, files in os.walk(r'untreated_dataset\\musdb18hq'):
    for file in files:
        if file in ['vocals.wav','drums.wav','bass.wav']:
            musdb18.append(os.path.join(root,file))

for root, subdir, files in os.walk(r'untreated_dataset\\musicnet'):
    for file in files:
        if '.wav' in file:
            musicnet.append(os.path.join(root,file))

for root, subdir, files in os.walk(r'untreated_dataset\\synthsod'):
    if 'Tree' not in root:
        continue
    for file in files:
        if '.flac' in file:
            synthsod.append(os.path.join(root,file))
            
datasets.extend(musdb18)
datasets.extend(musicnet)
datasets.extend(synthsod)

In [3]:
# id | dataset | title | stem | spec | directory |

def search(string):
    stems_dict = {
        'piano':['piano','harpsichord'],
        'accoustic':['horn', 'trombone', 'trumpet', 'tuba', 'bassoon', 'clarinet', 'coranglais', 'flute', 'oboe', 'piccolo'],
        'strings':['bass','cello','harp','viola','violin','violin_1','violin_2','string','strings']
        }
    
    for key, stems in stems_dict.items():
        for stem in stems:
            if stem in string:
                return key
    return ''
    
metadata = {
    'dataset':[],
    'title':[],
    'stem':[],
    'spec':[],
    'formerdir':[]
}

mn_metadata = pd.read_csv(r'untreated_dataset\musicnet\musicnet_metadata.csv')

for direc in datasets:
    parsed = re.split(r'\\+',direc)
    
    if 'musdb18hq' in direc:
        metadata['dataset'] += ['musdb18hq']
        metadata['title'] += [parsed[-2].lower()]
        metadata['stem'] += [parsed[-1].split('.')[0]]
        metadata['spec'] += [parsed[-1].split('.')[0]]
        metadata['formerdir'] += [direc]
    elif 'musicnet' in direc:
        row = mn_metadata.loc[mn_metadata['id']==int(parsed[-1].split('.')[0])]
        spec = row.iat[0,4].lower()
        
        metadata['dataset'] += ['musicnet']
        metadata['title'] += [row['composition']]
        metadata['stem'] += [search(spec)]
        metadata['spec'] += [spec]
        metadata['formerdir'] += [direc]
    elif 'synthsod' in direc:
        spec = parsed[-1].split('.')[0].lower()
        
        metadata['dataset'] += ['synthsod']
        metadata['title'] += [parsed[-3]]
        metadata['stem'] += [search(spec)]
        metadata['spec'] += [spec]
        metadata['formerdir'] += [direc]
    else:
        pass

pd.DataFrame(metadata)['stem'].value_counts()

stem
strings      1853
accoustic    1585
              196
piano         186
Name: count, dtype: int64

In [4]:
df_meta = pd.DataFrame(metadata)
df_meta = pd.concat([
        df_meta.loc[df_meta['stem'].isin(['vocals','bass','drums'])],
        df_meta.loc[df_meta['stem']=='strings'].sample(n=150),
        df_meta.loc[df_meta['stem']=='piano'].sample(n=150),
        df_meta.loc[df_meta['stem']=='accoustic'].sample(n=150)
    ],axis=0,ignore_index=True)
df_meta

Unnamed: 0,dataset,title,stem,spec,formerdir
0,synthsod,symphony_102_2_orch,strings,violin_1,untreated_dataset\\synthsod\SynthSOD_data\symp...
1,synthsod,string_quartet_9_2_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\stri...
2,synthsod,string_quartet_15_1_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\stri...
3,synthsod,minor_works_hess-245_orch,strings,violin_2,untreated_dataset\\synthsod\SynthSOD_data\mino...
4,synthsod,symphony_093_2_orch,strings,viola,untreated_dataset\\synthsod\SynthSOD_data\symp...
...,...,...,...,...,...
445,synthsod,symphony_6_3-5_orch,accoustic,flute,untreated_dataset\\synthsod\SynthSOD_data\symp...
446,synthsod,mass_in_f_3_orch,accoustic,trombone,untreated_dataset\\synthsod\SynthSOD_data\mass...
447,synthsod,symphony_2_3_orch,accoustic,horn,untreated_dataset\\synthsod\SynthSOD_data\symp...
448,synthsod,sonata_in_g_i_4_orch,accoustic,trombone,untreated_dataset\\synthsod\SynthSOD_data\sona...


In [12]:
from pydub.utils import which
from pydub import AudioSegment
import shutil

AudioSegment.converter = which("ffmpeg")

dirs = []
for index, serie in df_meta.iterrows():
    newdir = f"stems/{serie['stem']}/{serie['stem']}_{serie['title']}_{index}.wav"
    if serie['dataset']=='synthsod':
        flac = AudioSegment.from_file(serie['formerdir'],format="flac")
        flac.export(newdir, format='wav')
    else:
        try:
            shutil.move(serie['formerdir'],newdir)
        except:
            pass
        
    dirs.append(newdir)
    
df_meta['newdir'] = dirs
df_meta.to_json('metadata.json')

## 2. Preparación y preprocesamiento de datos

In [None]:
file_count = sum(len(files) for _, _, files in os.walk('stems'))

with tqdm(total=file_count) as pbar:
    for root, _, files in os.walk('stems'):
        for file in files:
            path = os.path.join(root,file)
            audio = remove_silence(path)
            sf.write(path,audio,44100)
            pbar.update(1)

In [2]:
from utils import *
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [3]:
mixer = cacophony()
#mixer.generate_random(3)
#X, Y = mixer.read_from_jams()

In [4]:
X, Y = mixer.read_from_jams()

Reconstrucción de audios.


100%|██████████| 3/3 [00:08<00:00,  2.94s/it]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (36, 2) + inhomogeneous part.

In [133]:
jams_path = 'soundscapes.jams'

if not os.path.exists('temp'):
    print('Creada carpeta temporal para resguardar memoria.\n')
    os.makedirs('temp',exist_ok=True)

mixtures = []
stems = []

print('Reconstrucción de audios.')
for ann in tqdm(jams.load(jams_path,strict=False).annotations):
    temp = jams.JAMS()
    temp.annotations.append(ann)
    temp.file_metadata.duration = 5.0
    temp.save(r'temp\temp.jams',strict=False)

    mix_audio, _, _, stem_list = scaper.generate_from_jams(
        jams_infile = r'temp\temp.jams',
        fg_path = 'stems',
        bg_path = 'stems'
    )
    
    mixtures.append(normalize_track(mix_audio))

    stem = np.empty((220500,1),dtype='float64')
    for obs, stem_audio in zip(ann.data,stem_list):    
        stem = np.hstack((stem,normalize_track(stem_audio)))
    stems.append(stem)
    
    os.remove(r'temp\temp.jams')

X = np.array(mixtures)
Y = np.array(stems)
shutil.rmtree('temp')

Reconstrucción de audios.


100%|██████████| 3/3 [00:09<00:00,  3.01s/it]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (3, 220500) + inhomogeneous part.

In [138]:
len(stems[0][0])

7

In [130]:
len(stems[2][0])

24

In [117]:
stems[0].shape

(12, 220500)

In [114]:
len(stems[0][0])

220500

In [None]:
files, 

In [82]:
np.vstack([[0,1,2,3,4,5],])[:,np.newaxis]

array([[[0, 1, 2, 3, 4, 5]]])

In [66]:
np.hstack([stems[0][i][0] for i in range(6)]).shape

(220500, 6)

In [56]:
stems

[[[array([[-0.00000000e+00],
          [-3.38954341e-05],
          [-2.49629255e-05],
          ...,
          [-4.45981952e-06],
          [-2.65996287e-06],
          [-0.00000000e+00]]),
   0],
  [array([[-0.00000000e+00],
          [-7.66287842e-06],
          [-1.41657851e-05],
          ...,
          [ 0.00000000e+00],
          [ 0.00000000e+00],
          [ 0.00000000e+00]]),
   1],
  [array([[0.        ],
          [0.00047837],
          [0.00086644],
          ...,
          [0.        ],
          [0.        ],
          [0.        ]]),
   2],
  [array([[ 0.00000000e+00],
          [ 2.56746997e-04],
          [ 3.83346151e-04],
          ...,
          [ 4.87598692e-07],
          [-9.34382347e-08],
          [-5.31677445e-08]]),
   3],
  [array([[-0.00000000e+00],
          [-2.39401448e-05],
          [-3.89293407e-05],
          ...,
          [ 0.00000000e+00],
          [ 0.00000000e+00],
          [ 0.00000000e+00]]),
   4],
  [array([[0.00000000e+00],
          [4

In [21]:
np.empty((1,220500))

array([[0., 0., 0., ..., 0., 0., 0.]])

In [58]:
stems[0][0]

[array([[-0.00000000e+00],
        [-3.38954341e-05],
        [-2.49629255e-05],
        ...,
        [-4.45981952e-06],
        [-2.65996287e-06],
        [-0.00000000e+00]]),
 0]

In [9]:
len(stems)

3

In [29]:
stem[0][1].dtype

dtype('float64')

In [25]:
stem[

[0,
 array([[0.        ],
        [0.00048241],
        [0.00072774],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]])]

In [27]:
np.array(stem)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (18, 2) + inhomogeneous part.

In [26]:
for item in stem:
    print(f'Número instrumentos: {item[0]}')
    print(f'Cantidad dimensional: {len(item[1])}')
    print('\n')

Número instrumentos: 0
Cantidad dimensional: 220500


Número instrumentos: 1
Cantidad dimensional: 220500


Número instrumentos: 2
Cantidad dimensional: 220500


Número instrumentos: 3
Cantidad dimensional: 220500


Número instrumentos: 4
Cantidad dimensional: 220500


Número instrumentos: 5
Cantidad dimensional: 220500


Número instrumentos: 0
Cantidad dimensional: 220500


Número instrumentos: 1
Cantidad dimensional: 220500


Número instrumentos: 2
Cantidad dimensional: 220500


Número instrumentos: 3
Cantidad dimensional: 220500


Número instrumentos: 4
Cantidad dimensional: 220500


Número instrumentos: 5
Cantidad dimensional: 220500


Número instrumentos: 0
Cantidad dimensional: 220500


Número instrumentos: 1
Cantidad dimensional: 220500


Número instrumentos: 2
Cantidad dimensional: 220500


Número instrumentos: 3
Cantidad dimensional: 220500


Número instrumentos: 4
Cantidad dimensional: 220500


Número instrumentos: 5
Cantidad dimensional: 220500




In [15]:
len(stems[0])

6

In [11]:
stems.shape

AttributeError: 'list' object has no attribute 'shape'

In [5]:
X

NameError: name 'X' is not defined

In [31]:
import jams



accoustic
bass
drums
piano
strings
vocals


In [35]:
os.listdir('jams')

['soundscape_1.jams', 'soundscape_2.jams', 'soundscape_3.jams']

In [33]:
all_jams = jams.JAMS()


In [34]:
for jams_file in [os.path.join('jams',f) for f in os.listdir('jams')]:
    jam = jams.load(jams_file)
    
    for annot in jam.annotations:
        all_jams.annotations.append(annot)
        
all_jams.save('hey.jams')

SchemaError: None is not of type 'number'

Failed validating 'type' in schema['properties']['file_metadata']['properties']['duration']:
    {'type': 'number', 'minimum': 0.0}

On instance['file_metadata']['duration']:
    None

In [38]:
all_jams.save('hey.jams',strict=False)

In [40]:
a2 = jams.load('hey.jams',strict=False)

In [51]:
temp = jams.JAMS()
temp.annotations.append(a2.annotations[0])
temp.save('prueba.jams',strict=False)

In [53]:
a2.annotations[0].data

SortedKeyList([Observation(time=0.0, duration=5.0, value={'label': 'accoustic', 'source_file': 'stems\\accoustic\\accoustic_glagolitic_mass_1_orch_404.wav', 'source_time': 6.484106149593443, 'event_time': 0, 'event_duration': 4.796696849133465, 'snr': 1.5107702550194446, 'role': 'foreground', 'pitch_shift': 1.659838702175123, 'time_stretch': 1.0423839899124046}, confidence=1.0), Observation(time=0.0, duration=4.898901348276406, value={'label': 'bass', 'source_file': 'stems\\bass\\bass_lushlife - toynbee suite_300.wav', 'source_time': 0.6678708154328792, 'event_time': 0, 'event_duration': 5.0, 'snr': -1.2918174780173364, 'role': 'foreground', 'pitch_shift': 0.6753650106544291, 'time_stretch': 0.9797802696552813}, confidence=1.0), Observation(time=0.0, duration=5.0, value={'label': 'drums', 'source_file': 'stems\\drums\\drums_detsky sad - walkie talkie_34.wav', 'source_time': 4.462902299486492, 'event_time': 0, 'event_duration': 4.579415191538984, 'snr': 3.8721274257632654, 'role': 'fore

In [4]:
5*44100

2205000