In [1]:
import os
import librosa as lb
from librosa.core import load,stft,istft,magphase
import numpy as np
from scipy.io import wavfile

In [2]:
import glob

In [3]:
curr = os.getcwd()
print(os.listdir(curr))

['dual_voice.mp3', 'Preprocess_MIR_1K.ipynb', 'single_2.wav', 'project.pptx', 'unet.pdf', 'test_2.wav', 'MIR_augment.zip', 'dual_voice.wav', 'wavenet_unet.pdf', 'Source_Separation.ipynb', 'single_voice.mp3', 'MIR-1K_Processed', 'wave-net.pdf', 'MIR_processed.zip', 'dual_2.wav', 'MIR-1K', 'single_voice.wav', 'audio_2(1).wav', '.ipynb_checkpoints', 'MIR-1K.zip']


In [None]:
#skip if unzipped
!unzip MIR-1K.zip

In [4]:
!tree -d MIR-1K

[01;34mMIR-1K[00m
├── [01;34mLyrics[00m
├── [01;34mLyricsWav[00m
├── [01;34mPitchLabel[00m
├── [01;34mUndividedWavfile[00m
├── [01;34mUnvoicedFrameLabel[00m
├── [01;34mvocal-nonvocalLabel[00m
└── [01;34mWavfile[00m

7 directories


In [5]:
Sample_rate = 8192
Window_size = 1024
Hop = 768
Patch = 128
Epoch = 200
Batch = 16
Stride = 10

In [6]:
def mag_phase_spectrogram(wav):
    wav = wav.astype(np.float32)
    spec = stft(wav,n_fft=Window_size,hop_length=Hop,win_length=Window_size)
    mag_spec,phase_spec = magphase(spec)
    return mag_spec.astype(np.float32),phase_spec

In [10]:
curr = 'MIR-1K/Wavfile'
out = 'MIR-1K_Processed'

if not os.path.exists(out):
        os.makedirs(out)
        
for f in os.listdir(curr):
    print("Processing {}".format(f))
    path = os.path.join(curr,f)
    _,audio = wavfile.read(path)
    # print(audio.shape)
    acc_wav = audio[:,0]
    voc_wav = audio[:,1]
    mix_wav = np.sum(audio,axis=-1) #take sum over last axis
    
    #extract magnitude spectrogram of wav files
    acc_mag,_= mag_phase_spectrogram(acc_wav)
    voc_mag,_ = mag_phase_spectrogram(voc_wav)
    mix_mag,mix_phase = mag_phase_spectrogram(mix_wav)
    
    #normalise magnitude spectrograms
    peak = mix_mag.max()
    acc_norm = acc_mag/peak
    voc_norm = voc_mag/peak
    mix_norm = mix_mag/peak  
    
    #save as .npz files
    print("Saving {}".format(f))
    f_ = f.split('.')[0]
    np.savez_compressed(f'{out}/{f_}.npz',mix_mag=mix_norm,mix_phase=mix_phase,vocal=voc_norm,acc=acc_norm)

Processing leon_5_05.wav
Saving leon_5_05.wav
Processing ariel_4_05.wav
Saving ariel_4_05.wav
Processing bobon_2_03.wav
Saving bobon_2_03.wav
Processing amy_3_01.wav
Saving amy_3_01.wav
Processing leon_5_09.wav
Saving leon_5_09.wav
Processing leon_6_04.wav
Saving leon_6_04.wav
Processing titon_4_06.wav
Saving titon_4_06.wav
Processing leon_8_12.wav
Saving leon_8_12.wav
Processing yifen_3_04.wav
Saving yifen_3_04.wav
Processing khair_4_04.wav
Saving khair_4_04.wav
Processing bug_5_13.wav
Saving bug_5_13.wav
Processing amy_1_06.wav
Saving amy_1_06.wav
Processing amy_7_07.wav
Saving amy_7_07.wav
Processing yifen_5_10.wav
Saving yifen_5_10.wav
Processing yifen_2_14.wav
Saving yifen_2_14.wav
Processing bug_3_01.wav
Saving bug_3_01.wav
Processing bobon_3_06.wav
Saving bobon_3_06.wav
Processing khair_2_05.wav
Saving khair_2_05.wav
Processing ariel_4_04.wav
Saving ariel_4_04.wav
Processing bobon_4_04.wav
Saving bobon_4_04.wav
Processing jmzen_5_02.wav
Saving jmzen_5_02.wav
Processing geniustur

Processing abjones_2_07.wav
Saving abjones_2_07.wav
Processing geniusturtle_4_03.wav
Saving geniusturtle_4_03.wav
Processing Ani_4_09.wav
Saving Ani_4_09.wav
Processing amy_4_08.wav
Saving amy_4_08.wav
Processing ariel_2_06.wav
Saving ariel_2_06.wav
Processing annar_5_05.wav
Saving annar_5_05.wav
Processing bug_5_12.wav
Saving bug_5_12.wav
Processing fdps_3_04.wav
Saving fdps_3_04.wav
Processing yifen_1_01.wav
Saving yifen_1_01.wav
Processing annar_1_06.wav
Saving annar_1_06.wav
Processing yifen_4_01.wav
Saving yifen_4_01.wav
Processing ariel_5_06.wav
Saving ariel_5_06.wav
Processing ariel_1_01.wav
Saving ariel_1_01.wav
Processing davidson_1_03.wav
Saving davidson_1_03.wav
Processing geniusturtle_8_07.wav
Saving geniusturtle_8_07.wav
Processing jmzen_4_04.wav
Saving jmzen_4_04.wav
Processing bobon_1_02.wav
Saving bobon_1_02.wav
Processing geniusturtle_6_04.wav
Saving geniusturtle_6_04.wav
Processing khair_1_03.wav
Saving khair_1_03.wav
Processing leon_4_03.wav
Saving leon_4_03.wav
Proc

Processing fdps_1_10.wav
Saving fdps_1_10.wav
Processing geniusturtle_5_03.wav
Saving geniusturtle_5_03.wav
Processing fdps_2_06.wav
Saving fdps_2_06.wav
Processing khair_5_03.wav
Saving khair_5_03.wav
Processing Kenshin_4_06.wav
Saving Kenshin_4_06.wav
Processing khair_5_05.wav
Saving khair_5_05.wav
Processing bobon_4_09.wav
Saving bobon_4_09.wav
Processing leon_1_06.wav
Saving leon_1_06.wav
Processing titon_4_01.wav
Saving titon_4_01.wav
Processing leon_9_02.wav
Saving leon_9_02.wav
Processing heycat_2_04.wav
Saving heycat_2_04.wav
Processing davidson_4_03.wav
Saving davidson_4_03.wav
Processing annar_5_01.wav
Saving annar_5_01.wav
Processing stool_3_05.wav
Saving stool_3_05.wav
Processing bobon_3_08.wav
Saving bobon_3_08.wav
Processing leon_3_11.wav
Saving leon_3_11.wav
Processing stool_1_03.wav
Saving stool_1_03.wav
Processing amy_9_02.wav
Saving amy_9_02.wav
Processing ariel_1_05.wav
Saving ariel_1_05.wav
Processing Ani_5_06.wav
Saving Ani_5_06.wav
Processing amy_2_01.wav
Saving a

Saving khair_6_07.wav
Processing khair_2_07.wav
Saving khair_2_07.wav
Processing jmzen_4_09.wav
Saving jmzen_4_09.wav
Processing bug_2_02.wav
Saving bug_2_02.wav
Processing geniusturtle_2_02.wav
Saving geniusturtle_2_02.wav
Processing ariel_4_03.wav
Saving ariel_4_03.wav
Processing bobon_5_01.wav
Saving bobon_5_01.wav
Processing geniusturtle_6_02.wav
Saving geniusturtle_6_02.wav
Processing fdps_1_05.wav
Saving fdps_1_05.wav
Processing titon_5_03.wav
Saving titon_5_03.wav
Processing stool_5_03.wav
Saving stool_5_03.wav
Processing leon_1_03.wav
Saving leon_1_03.wav
Processing bobon_3_02.wav
Saving bobon_3_02.wav
Processing davidson_5_02.wav
Saving davidson_5_02.wav
Processing ariel_2_02.wav
Saving ariel_2_02.wav
Processing amy_14_06.wav
Saving amy_14_06.wav
Processing Ani_3_01.wav
Saving Ani_3_01.wav
Processing geniusturtle_1_03.wav
Saving geniusturtle_1_03.wav
Processing stool_1_01.wav
Saving stool_1_01.wav
Processing Ani_2_03.wav
Saving Ani_2_03.wav
Processing fdps_4_03.wav
Saving fdps

Processing leon_6_06.wav
Saving leon_6_06.wav
Processing Kenshin_4_08.wav
Saving Kenshin_4_08.wav
Processing Kenshin_2_04.wav
Saving Kenshin_2_04.wav
Processing amy_9_05.wav
Saving amy_9_05.wav
Processing bug_4_01.wav
Saving bug_4_01.wav
Processing leon_5_06.wav
Saving leon_5_06.wav
Processing stool_2_06.wav
Saving stool_2_06.wav
Processing annar_5_02.wav
Saving annar_5_02.wav
Processing leon_2_10.wav
Saving leon_2_10.wav
Processing Kenshin_4_10.wav
Saving Kenshin_4_10.wav
Processing davidson_3_12.wav
Saving davidson_3_12.wav
Processing abjones_5_04.wav
Saving abjones_5_04.wav
Processing amy_10_08.wav
Saving amy_10_08.wav
Processing yifen_3_01.wav
Saving yifen_3_01.wav
Processing annar_2_01.wav
Saving annar_2_01.wav
Processing leon_4_07.wav
Saving leon_4_07.wav
Processing geniusturtle_8_03.wav
Saving geniusturtle_8_03.wav
Processing yifen_2_11.wav
Saving yifen_2_11.wav
Processing stool_4_10.wav
Saving stool_4_10.wav
Processing yifen_1_09.wav
Saving yifen_1_09.wav
Processing tammy_1_02.

Processing khair_5_04.wav
Saving khair_5_04.wav
Processing amy_12_02.wav
Saving amy_12_02.wav
Processing geniusturtle_2_01.wav
Saving geniusturtle_2_01.wav
Processing geniusturtle_1_08.wav
Saving geniusturtle_1_08.wav
Processing leon_3_06.wav
Saving leon_3_06.wav
Processing tammy_1_01.wav
Saving tammy_1_01.wav
Processing geniusturtle_4_05.wav
Saving geniusturtle_4_05.wav
Processing yifen_2_13.wav
Saving yifen_2_13.wav
Processing amy_6_11.wav
Saving amy_6_11.wav
Processing heycat_3_02.wav
Saving heycat_3_02.wav
Processing leon_4_04.wav
Saving leon_4_04.wav
Processing heycat_1_03.wav
Saving heycat_1_03.wav
Processing fdps_5_04.wav
Saving fdps_5_04.wav
Processing titon_1_01.wav
Saving titon_1_01.wav
Processing amy_11_02.wav
Saving amy_11_02.wav
Processing geniusturtle_1_05.wav
Saving geniusturtle_1_05.wav
Processing amy_4_04.wav
Saving amy_4_04.wav
Processing yifen_5_06.wav
Saving yifen_5_06.wav
Processing annar_2_07.wav
Saving annar_2_07.wav
Processing leon_9_06.wav
Saving leon_9_06.wav


In [7]:
def load_npy(npz_file, target = "mix"):
  npy_files = np.load(npz_file)
  file = f'{target}.npy'
  target_file = npy_files[file]
  mix_mag_file = npy_files['mix_mag.npy']
  print(f"{target}_shape : {target_file.shape} & mix_mag_shape : {mix_mag_file.shape}")
  return mix_mag_file,target_file

In [18]:
curr = 'MIR-1K_Processed'

for f in os.listdir(curr):
    path = os.path.join(curr,f)
    mix_mag,mix_phase= load_npy(path,target="mix_phase")
    print(f'shapes: mag - {mix_mag.shape}, phase - {mix_phase.shape}')
    print(f'Phase values')
    print(mix_phase)
    break    

mix_phase_shape : (513, 180) & mix_mag_shape : (513, 180)
shapes: mag - (513, 180), phase - (513, 180)
Phase values
[[ 1.        +0.0000000e+00j  1.        +0.0000000e+00j
   1.        +0.0000000e+00j ...  1.        +0.0000000e+00j
   1.        +0.0000000e+00j -1.        -8.7422777e-08j]
 [ 1.        -1.3947754e-15j -0.8083236 -5.8873838e-01j
  -0.97727305+2.1198441e-01j ... -0.94996005+3.1237146e-01j
  -0.49649122+8.6804175e-01j -0.18723296-9.8231554e-01j]
 [-1.        +8.7422777e-08j  0.99788785+6.4960711e-02j
   0.9860786 +1.6627969e-01j ... -0.14601654-9.8928213e-01j
  -0.56321937-8.2630742e-01j  0.99999154-4.1151904e-03j]
 ...
 [-1.        -8.7422777e-08j -0.6254815 -7.8023899e-01j
  -0.17973194-9.8371565e-01j ...  0.926273  -3.7685320e-01j
  -0.99666846-8.1559718e-02j -0.9981888 +6.0159251e-02j]
 [ 1.        -5.1367164e-16j  0.06494616+9.9788874e-01j
  -0.2919497 +9.5643365e-01j ... -0.97647053-2.1565080e-01j
   0.9507806 +3.0986500e-01j  0.72793883+6.8564212e-01j]
 [ 1.        +

In [19]:
y,sr = lb.load('MIR-1K/Wavfile/abjones_1_01.wav')
print(sr)

22050


**Data Augmentation**

In [9]:
def add_noise(audio,noise_factor):
    noise = np.random.randn(audio.shape[0],audio.shape[1])
    aug_audio = noise + noise_factor*noise
    return aug_audio.astype(np.float32)

In [8]:
def change_pitch(audio,sr,pitch_factor):
    return lb.effects.pitch_shift(audio,sr,pitch_factor)

In [10]:
curr = 'MIR-1K_Processed'
out = 'MIR-1K_Augmented'

if not os.path.exists(out):
        os.makedirs(out)

os.chdir(curr)
all_files = glob.glob('*.npz')
np.random.shuffle(all_files)
os.chdir('..')

print("Data augmentation processing...")

for i in range(100):
  npz_file = all_files[i]
  f = npz_file.split('.')[0]
  path = os.path.join(curr,npz_file)
  files = np.load(path)
  mix_phase = files['mix_phase']
  
  #add noise
  voc_ns = add_noise(files['vocal'],0.05)
  mix_ns = add_noise(files['mix_mag'],0.05)    
  acc_ns = add_noise(files['acc'],0.05)
  
  #normalise noisy data
  peak = mix_ns.max()
  acc_norm = acc_ns/peak
  voc_norm = voc_ns/peak
  mix_norm = mix_ns/peak
  
  print("Saving noisy {}".format(f))
  np.savez_compressed(f'{out}/{f}_noisy.npz',mix_mag=mix_norm,mix_phase=mix_phase,vocal=voc_norm,acc=acc_norm)
    
print("Process complete...")

Data augmentation processing...
Saving noisy geniusturtle_3_08
Saving noisy yifen_2_09
Saving noisy heycat_4_06
Saving noisy davidson_2_04
Saving noisy jmzen_2_12
Saving noisy yifen_2_06
Saving noisy titon_1_03
Saving noisy Kenshin_5_07
Saving noisy bobon_2_04
Saving noisy heycat_1_02
Saving noisy bobon_1_01
Saving noisy annar_2_08
Saving noisy khair_4_02
Saving noisy leon_6_02
Saving noisy amy_7_06
Saving noisy leon_9_06
Saving noisy stool_5_01
Saving noisy Ani_4_02
Saving noisy stool_4_05
Saving noisy titon_3_03
Saving noisy abjones_2_05
Saving noisy fdps_3_02
Saving noisy titon_5_03
Saving noisy Ani_4_10
Saving noisy amy_16_08
Saving noisy yifen_2_12
Saving noisy abjones_4_06
Saving noisy jmzen_2_10
Saving noisy Ani_3_02
Saving noisy jmzen_4_01
Saving noisy annar_5_05
Saving noisy leon_5_07
Saving noisy bug_2_08
Saving noisy abjones_2_11
Saving noisy davidson_2_10
Saving noisy amy_7_01
Saving noisy geniusturtle_7_07
Saving noisy ariel_1_06
Saving noisy fdps_5_09
Saving noisy yifen_2

In [16]:
!pwd

/home/pratikhya/Desktop/MLSP/Source_Separation_Project/MIR-1K


In [17]:
%cd ..

/home/pratikhya/Desktop/MLSP/Source_Separation_Project


In [None]:
curr = '/home/pratikhya/Desktop/MLSP/Source_Separation_Project/MIR-1K/Wavfile'
out = '/home/pratikhya/Desktop/MLSP/Source_Separation_Project/MIR-1K_Augmented'

if not os.path.exists(out):
        os.makedirs(out)

os.chdir(curr)
all_files = glob.glob('*.wav')
np.random.shuffle(all_files)
os.chdir('..')

        
print("Data Augmentation processing...")

for i in range(100):
    f = all_files[i]
    path = os.path.join(curr,f)
    _,audio = wavfile.read(path)
    # print(audio.shape)
    acc_wav = audio[:,0].astype(np.float32)
    voc_wav = audio[:,1].astype(np.float32)
    mix_wav = np.sum(audio,axis=-1).astype(np.float32) #take sum over last axis

    #change pitch
    voc_pc = change_pitch(voc_wav,Sample_rate,0.02)
    acc_pc = change_pitch(acc_wav,Sample_rate,0.02)
    mix_pc = change_pitch(mix_wav,Sample_rate,0.02)
    
    #extract magnitude spectrogram of wav files
    acc_mag,_= mag_phase_spectrogram(acc_pc)
    voc_mag,_ = mag_phase_spectrogram(voc_pc)
    mix_mag,mix_phase = mag_phase_spectrogram(mix_pc)
    
    #normalise magnitude spectrograms
    peak = mix_mag.max()
    acc_norm = acc_mag/peak
    voc_norm = voc_mag/peak
    mix_norm = mix_mag/peak  
    
    print("Saving pitch {}".format(f))
    f_ = f.split('.')[0]
    np.savez_compressed(f'{out}/{f_}_pitch.npz',mix_mag=mix_norm,mix_phase=mix_phase,vocal=voc_norm,acc=acc_norm)
    
print("Processing complete...")

Data Augmentation processing...


  return lb.effects.pitch_shift(audio,sr,pitch_factor)
  return lb.effects.pitch_shift(audio,sr,pitch_factor)


Saving pitch fdps_1_03.wav
Saving pitch amy_4_05.wav
Saving pitch leon_8_13.wav
Saving pitch Kenshin_5_09.wav
Saving pitch abjones_3_10.wav
Saving pitch ariel_3_03.wav
Saving pitch leon_9_03.wav
Saving pitch stool_1_04.wav
Saving pitch fdps_4_04.wav
Saving pitch amy_14_01.wav
Saving pitch geniusturtle_7_03.wav
Saving pitch heycat_5_02.wav
Saving pitch leon_8_07.wav
Saving pitch Kenshin_2_10.wav
Saving pitch bobon_4_05.wav
Saving pitch leon_1_10.wav
Saving pitch titon_4_11.wav
Saving pitch davidson_1_04.wav
Saving pitch stool_5_04.wav
Saving pitch davidson_4_06.wav
Saving pitch yifen_2_01.wav
Saving pitch geniusturtle_4_03.wav
Saving pitch amy_15_07.wav
Saving pitch amy_12_01.wav
Saving pitch yifen_5_05.wav
Saving pitch heycat_3_03.wav
Saving pitch leon_2_07.wav
Saving pitch ariel_3_08.wav
Saving pitch Ani_3_05.wav
Saving pitch amy_5_02.wav
Saving pitch leon_7_09.wav
Saving pitch annar_5_07.wav
Saving pitch annar_1_06.wav
Saving pitch jmzen_1_08.wav
Saving pitch titon_4_10.wav
Saving pi

  return lb.effects.pitch_shift(audio,sr,pitch_factor)


Saving pitch leon_5_10.wav
Saving pitch fdps_2_03.wav
Saving pitch leon_7_07.wav
Saving pitch geniusturtle_8_06.wav
Saving pitch bug_5_01.wav
Saving pitch amy_7_05.wav
Saving pitch amy_16_02.wav
Saving pitch amy_9_01.wav
Saving pitch Kenshin_1_11.wav
Saving pitch amy_6_06.wav
Saving pitch leon_4_02.wav
Saving pitch heycat_4_06.wav
Saving pitch bobon_5_06.wav
Saving pitch bug_2_01.wav
Saving pitch annar_2_04.wav
Saving pitch Kenshin_4_06.wav
Saving pitch ariel_3_04.wav
Saving pitch yifen_1_04.wav
Saving pitch amy_16_01.wav
Saving pitch Ani_5_06.wav
Saving pitch fdps_5_03.wav
Saving pitch titon_1_07.wav
Saving pitch Ani_4_08.wav
Saving pitch Kenshin_3_08.wav
Saving pitch ariel_1_07.wav
Saving pitch bobon_4_03.wav
Saving pitch bug_4_02.wav
Saving pitch abjones_5_02.wav
Saving pitch fdps_3_03.wav
Saving pitch geniusturtle_8_08.wav
Saving pitch davidson_3_10.wav
Saving pitch khair_6_07.wav
Saving pitch yifen_3_09.wav
Saving pitch amy_13_06.wav
