In [2]:
import fluidsynth
import numpy as np
import glob
import time
from IPython.display import Audio
import random
import librosa
import os
from tqdm import tqdm
from scipy.io.wavfile import write

In [3]:
sf_path = "C:/Users/kolaw/Downloads/piano_dataset/sound_fonts"
sound_fonts = glob.glob(f"{sf_path}/*.sf2")
sound_fonts

['C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\198_Yamaha_SY1_piano.sf2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Nord Stage GP Grand Piano.sf2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Piano.SF2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Piano2.SF2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Pianoset.sf2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Steinway Grand Piano 1.2.sf2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\UprightPianoKW-small-bright-20190703.sf2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\YAMAHA DX7Piano.SF2',
 'C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\\Yamaha YPT 220 piano V4 full version - fixed.sf2']

In [4]:
noise_path = "C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources"
noise = glob.glob(f"{noise_path}/*.*[mp3|wav]")

In [5]:
noise

['C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\background-noise-45700.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\convention-crowd-noise-34612.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\eerie-atmosphere-ambience-372558.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\market-background-noise-281247.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\noise-332369.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\people-background-noise-358083.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\pink-noise-by-digitalspa-170340.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\rainambience-57663.mp3',
 'C:/Users/kolaw/Downloads/piano_dataset/03_Augmented_Audio/noise_sources\\relaxing-layered-brown-noise-304725.mp3',
 'C:/Users/kolaw/Downloa

In [6]:
start_key_number = 21
end_key_number = 108
SAMPLE_RATE =44100
AUDIO_CHANNEL = 2
vel = [80,127]
MAX_WAV_VAL = 32767

CHORD_STRUCTURES = {
    'single_note':[0],
    "major_2nd":[0,2],
    "minor_3rd":[0,3],
    "perfect_4th":[0,5],
    "perfect_5th":[0,7],
    "octave":[0,12],
    'major_triad': [0, 4, 7], 'minor_triad': [0, 3, 7],
    'diminished_triad': [0, 3, 6], 'augmented_triad': [0, 4, 8],
    'dominant_7th': [0, 4, 7, 10], 'major_7th': [0, 4, 7, 11],
    'minor_7th': [0, 3, 7, 10],
}
NOTE_NAMES = ['C', 'Cs', 'D', 'Ds', 'E', 'F', 'Fs', 'G', 'Gs', 'A', 'As', 'B']
KEY_RANGE = range(start_key_number,end_key_number+1)
codewd = "C:/Users/kolaw/Downloads/piano_dataset"

In [7]:
random_note = random.randint(start_key_number,end_key_number)
random_note

81

In [8]:
fs_sample = fluidsynth.Synth(samplerate=SAMPLE_RATE)

In [9]:
fs_sample.start()
sfID = fs_sample.sfload(random.choice(sound_fonts))

In [10]:
fs_sample.program_select(0,sfID,0,0)
# fs_sample.cc(0, 64, 127) 
fs_sample.noteon(0, random_note+CHORD_STRUCTURES["minor_triad"][0], 127)
fs_sample.noteon(0, random_note+CHORD_STRUCTURES["minor_triad"][1], 127)
fs_sample.noteon(0, random_note+CHORD_STRUCTURES["minor_triad"][2], 127)
audio = fs_sample.get_samples(SAMPLE_RATE*2)
fs_sample.system_reset()
 

0

In [11]:
max_val = np.max(np.abs(audio))
max_val

6409

In [12]:
Audio(audio/max_val,rate=SAMPLE_RATE,normalize=False)

In [13]:
fs_sample.delete()

In [14]:
def safe_mix(note_audio,noise_path,noise_weight=random.uniform(0.01,0.09)):
    max_length = len(note_audio)
    noise_audio,sr = librosa.load(noise_path,sr=SAMPLE_RATE)
    trimmed_noise_aud = noise_audio[:max_length]

    norm1 = note_audio / np.max(np.abs(note_audio))
    norm2 = trimmed_noise_aud / np.max(np.abs(trimmed_noise_aud))

    mixed_aud = norm1 + (norm2*noise_weight)
    
    return np.int16(mixed_aud * MAX_WAV_VAL)

In [15]:
Audio(safe_mix(audio,random.choice(noise)),rate=SAMPLE_RATE)

In [16]:
def note_number_to_name(note_number):
    octave = (note_number // 12)-1
    note_name = NOTE_NAMES[note_number % 12] 
    return f"{note_name}{octave}"

In [17]:
def get_file_name(key_type,note_names,volume ="",isAug = False):
    return f"{key_type}#{'_'.join(note_names)}#vol{volume}{'clean' if not isAug else 'aug'}.wav" 

In [None]:
for font in sound_fonts:
    print(f"Generating for sounf font {font}")

    # initialze fluidsynth
    fs = fluidsynth.Synth(samplerate=SAMPLE_RATE)
    fs.start()
    sID = fs.sfload(font)
    fs.program_select(0,sID,0,0)
    
    #get and create folder per sound font
    file_name = os.path.basename(font)
    
    folder_name =os.path.splitext(file_name)[0]

    path = os.path.join(f"{codewd}/audio_files/{folder_name}")
    
    os.makedirs(path,exist_ok=True)
    

    #loop for each key
    for i in tqdm(KEY_RANGE):
        # print(f"note {i}")
        #loop for each note combination
        for key_type, additions in CHORD_STRUCTURES.items():
            notes_names_map = {note_number_to_name(i + addons):i + addons for addons in additions}
            notes = list(notes_names_map.values())

            if not all(note in KEY_RANGE for note in notes):
                continue

            for vol in vel:
                #switching on the keys to play
                for note in notes:
                    fs.noteon(0, note, vol)
                    
                    file_name_clean = os.path.join(path,get_file_name(key_type,list(notes_names_map.keys()),volume =f"{vol}"))
                    file_name_aug = os.path.join(path,get_file_name(key_type,list(notes_names_map.keys()),isAug=True,volume =f"{vol}"))
                    
                    audio_clean = fs.get_samples(SAMPLE_RATE*2)
                    audio_clean_int16 = np.int16(audio_clean / np.max(np.abs(audio_clean)) * MAX_WAV_VAL)
            
                    audio_aug_int16 = safe_mix(audio_clean,random.choice(noise))
                    
                    write(file_name_clean , SAMPLE_RATE, audio_clean_int16)
                    write(file_name_aug, SAMPLE_RATE, audio_aug_int16)
                fs.system_reset()

Generating for sounf font C:/Users/kolaw/Downloads/piano_dataset/sound_fonts\198_Yamaha_SY1_piano.sf2


 23%|██▎       | 20/88 [06:32<23:11, 20.46s/it]

In [None]:
def mix_noise(noise_array):
    max_length = SAMPLE_RATE * 4

    final_mix = np.zeros(max_length)
    noise_name = ""

    for noise in noise_array:
        file_name = os.path.basename(noise)
    
        name =os.path.splitext(file_name)[0]
        applied_weight = round(random.uniform(0.1,0.2),2)

        noise_name += f"{name}-{applied_weight}"
        
        noise_aud,_ = librosa.load(noise,sr=SAMPLE_RATE)
        
        trimmed = noise_aud[:max_length]
        max_amp = np.max(np.abs(trimmed))
        norm = (trimmed/ max_amp)*applied_weight
        final_mix += norm
    return np.int16(final_mix * MAX_WAV_VAL),f"{noise_name}.wav"
    

In [None]:
Audio(mix_noise([random.choice(noise),random.choice(noise)])[0],rate=SAMPLE_RATE)

In [None]:
noise_mixture_path = os.path.join(f"{codewd}/audio_files/noise")
os.makedirs(noise_mixture_path ,exist_ok=True)
# mix noise
for index,single_noise in enumerate(noise):
    aud_array, processed_name = mix_noise([single_noise])
    write(os.path.join(noise_mixture_path,processed_name),SAMPLE_RATE,aud_array)
    for i in range(index+1,len(noise)):
        aud_array_mix, processed_name = mix_noise([single_noise,noise[i]])
        write(os.path.join(noise_mixture_path,processed_name), SAMPLE_RATE, aud_array_mix)
        