# 1. Setup libraries

In [2]:
import librosa
import os
import numpy as np
import glob
from glob import iglob
import matplotlib.pyplot as plt
import librosa.display
import random
from pydub import AudioSegment




# 3. Split in training data and test data

In [3]:
data_path = "../data/genres/**"
save_path_train = "../data/genres_train/"
save_path_test = "../data/genres_test/"


random.seed(1)


# Create target directory & all intermediate directories if don't exists
try:
    os.makedirs(save_path_train)    
    print("Directory " , save_path_train ,  " Created ")
except FileExistsError:
    pass

try:
    os.makedirs(save_path_test)    
    print("Directory " , save_path_test ,  " Created ")
except FileExistsError:
    pass


strings = list()

#get files names
for folderGenre in glob.iglob(data_path, recursive=True):
    if not os.path.isfile(folderGenre):
        #print(folderGenre)
        strings = list()
        for filename in glob.iglob(folderGenre + "/*", recursive=True):
            if  os.path.isfile(filename):
                #print(filename)
                strings.append(filename)
                
        random.shuffle(strings)
        #print(range(int(len(strings) / 100 * 20)))
        
        for filenumber in range(int(len(strings) / 100 * 20)):
            file = strings[filenumber]
            genre = file.split('\\')[1] #Get the genres folder
            name = file.split('\\')[2]
            number = name.split('.')[1]

            audio = AudioSegment.from_wav(file)
            try:
                os.makedirs(save_path_test + genre)    
                print("Directory " , save_path_test  + genre,  " Created ")
            except FileExistsError:
                pass

            audio.export(save_path_test + genre + "\\" + name, format="wav")

        for filenumber in range(int(len(strings) / 100 * 20), len(strings)):
            file = strings[filenumber]
            genre = file.split('\\')[1] #Get the genres folder
            name = file.split('\\')[2]
            number = name.split('.')[1]

            audio = AudioSegment.from_wav(file)
            try:
                os.makedirs(save_path_train + genre)    
                print("Directory " , save_path_train  + genre,  " Created ")
            except FileExistsError:
                pass

            audio.export(save_path_train + genre + "\\" + name, format="wav")


Directory  ../data/genres_train/  Created 
Directory  ../data/genres_test/  Created 
Directory  ../data/genres_test/blues  Created 
Directory  ../data/genres_train/blues  Created 
Directory  ../data/genres_test/classical  Created 
Directory  ../data/genres_train/classical  Created 
Directory  ../data/genres_test/country  Created 
Directory  ../data/genres_train/country  Created 
Directory  ../data/genres_test/disco  Created 
Directory  ../data/genres_train/disco  Created 
Directory  ../data/genres_test/hiphop  Created 
Directory  ../data/genres_train/hiphop  Created 
Directory  ../data/genres_test/jazz  Created 
Directory  ../data/genres_train/jazz  Created 
Directory  ../data/genres_test/metal  Created 
Directory  ../data/genres_train/metal  Created 
Directory  ../data/genres_test/pop  Created 
Directory  ../data/genres_train/pop  Created 
Directory  ../data/genres_test/reggae  Created 
Directory  ../data/genres_train/reggae  Created 
Directory  ../data/genres_test/rock  Created 
Dire

# 2. Split sound files

In [10]:
rootdir_glob = "../data/genres_test/**"
save_path = "../data/genres_split_test/"

# Create target directory & all intermediate directories if don't exists
try:
    os.makedirs(save_path)    
    print("Directory " , save_path ,  " Created ")
except FileExistsError:
    pass

for filename in glob.iglob(rootdir_glob, recursive=True):
    if os.path.isfile(filename):
        save_folder = filename.split('\\')[1] #Get the genres folder
        name = filename.split('\\')[2]
        
        number = name.split('.')[1]
        genre = name.split('.')[0]
        
            # Create target directory & all intermediate directories if don't exists
        try:
            os.makedirs(save_path + genre)    
            print("Directory " , save_path + genre ,  " Created ")
        except FileExistsError:
            pass
        
        t1 = 0 * 1000 #Works in milliseconds
        t2 = 10 * 1000
        newAudio = AudioSegment.from_wav(filename)
        newAudio = newAudio[t1:t2]
        newAudio.export(save_path + genre + "\\" "1_" + name, format="wav")
        
        t1 = 10 * 1000 #Works in milliseconds
        t2 = 20 * 1000
        newAudio = AudioSegment.from_wav(filename)
        newAudio = newAudio[t1:t2]
        newAudio.export(save_path + genre + "\\" "2_" + name, format="wav")
        
        t1 = 20 * 1000 #Works in milliseconds
        t2 = 30 * 1000
        newAudio = AudioSegment.from_wav(filename)
        newAudio = newAudio[t1:t2]
        newAudio.export(save_path + genre + "\\" + "3_" + name, format="wav")

Directory  ../data/genres_split_test/  Created 
Directory  ../data/genres_split_test/blues  Created 
Directory  ../data/genres_split_test/classical  Created 
Directory  ../data/genres_split_test/country  Created 
Directory  ../data/genres_split_test/disco  Created 
Directory  ../data/genres_split_test/hiphop  Created 
Directory  ../data/genres_split_test/jazz  Created 
Directory  ../data/genres_split_test/metal  Created 
Directory  ../data/genres_split_test/pop  Created 
Directory  ../data/genres_split_test/reggae  Created 
Directory  ../data/genres_split_test/rock  Created 


# 4. Define augmentation functions and path to folders

In [8]:
n_songs = 100
rootdir_glob =  "../data/genres_split_train/**"
save_path = "../data/genres_split_train_aug/"

try:
    os.makedirs(save_path)    
    print("Directory " , save_path ,  " Created ")
except FileExistsError:
    pass

def manipulate(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    # Cast back to same data type
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def shift(data, sampling_rate, shift_max, shift_direction):
    shift = sampling_rate*shift_max
    shift_data = [0]*shift
    if shift_direction == 'right':
        shift = -shift
    elif shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data=np.concatenate((augmented_data, shift_data), axis=0)
    else:
        augmented_data = np.concatenate((shift_data,augmented_data),axis=0)
    return augmented_data

def pitch_shift(data, sampling_rate, pitch_factor):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

def speed_change(data, speed_factor,sampleRate):
    data = librosa.effects.time_stretch(data, speed_factor)
    data = shift(data,sampleRate,5,'both') #Shifting the song to make it 30 secs long.
    return data

Directory  ../data/genres_split_train_aug/  Created 


# 5. Augment data

## Remember to split the train and test dataset before this step, and do the augmentation ONLY on the training data!

Augmentation chosen:
    - add noise
    - add more noise
    - change pitch
    - speed up
    - speed down
    
After augmenting, the dataset will be 6 times larger

In [9]:
count = 0
for filename in glob.iglob(rootdir_glob, recursive=True):
    if os.path.isfile(filename):
        x, sampleRate = librosa.load(filename, duration=30)
        save_folder = filename.split('\\')[1] #Get the genres folder
        name = filename.split('\\')[2]
        
        try:
            os.makedirs(save_path + save_folder)    
            print("Directory " ,save_path + save_folder ,  " Created ")
        except FileExistsError:
            pass
        
        save_fileName = save_path  + save_folder + "/" + "original_" + name
        librosa.output.write_wav(save_fileName, x, sampleRate)
        
        # add noise
        noise_factor = np.random.normal(0,2)/100
        augment_data = manipulate(x,noise_factor)
        save_fileName = save_path  + save_folder + "/" + "aug1_" + name
        librosa.output.write_wav(save_fileName , augment_data, sampleRate)
        
        # add MORE noise!
        noise_factor = np.random.normal(2,4)/100
        augment_data = manipulate(x,noise_factor)
        save_fileName = save_path  + save_folder + "/" + "aug2_" + name
        librosa.output.write_wav(save_fileName , augment_data, sampleRate)

        #change pitch a bit
        pitch_factor = np.random.normal(0,2)/100
        augment_data = pitch_shift(x,sampleRate,pitch_factor)
        save_fileName = save_path  + save_folder + "/" + "aug3_" + name
        librosa.output.write_wav(save_fileName, augment_data, sampleRate)

        # speed up song
        augment_data = speed_change(x,1.1,sampleRate)
        save_fileName = save_path  + save_folder + "/" + "aug4_" + name
        librosa.output.write_wav(save_fileName, augment_data, sampleRate)
        
        # slow down song
        augment_data = speed_change(x,0.9,sampleRate)
        save_fileName = save_path  + save_folder + "/" + "aug5_" + name
        librosa.output.write_wav(save_fileName, augment_data, sampleRate)


        count = count + 1
        if(count % 10 == 0):
            print("Completed ",count," songs Augmentation")
print("Done...")


Directory  ../data/genres_split_train_aug/blues  Created 
Completed  10  songs Augmentation
Completed  20  songs Augmentation
Completed  30  songs Augmentation
Completed  40  songs Augmentation
Completed  50  songs Augmentation
Completed  60  songs Augmentation
Completed  70  songs Augmentation
Completed  80  songs Augmentation
Completed  90  songs Augmentation
Completed  100  songs Augmentation
Completed  110  songs Augmentation
Completed  120  songs Augmentation
Completed  130  songs Augmentation
Completed  140  songs Augmentation
Completed  150  songs Augmentation
Completed  160  songs Augmentation
Completed  170  songs Augmentation
Completed  180  songs Augmentation
Completed  190  songs Augmentation
Completed  200  songs Augmentation
Completed  210  songs Augmentation
Completed  220  songs Augmentation
Completed  230  songs Augmentation
Completed  240  songs Augmentation
Directory  ../data/genres_split_train_aug/classical  Created 
Completed  250  songs Augmentation
Completed  260

Completed  2170  songs Augmentation
Completed  2180  songs Augmentation
Completed  2190  songs Augmentation
Completed  2200  songs Augmentation
Completed  2210  songs Augmentation
Completed  2220  songs Augmentation
Completed  2230  songs Augmentation
Completed  2240  songs Augmentation
Completed  2250  songs Augmentation
Completed  2260  songs Augmentation
Completed  2270  songs Augmentation
Completed  2280  songs Augmentation
Completed  2290  songs Augmentation
Completed  2300  songs Augmentation
Completed  2310  songs Augmentation
Completed  2320  songs Augmentation
Completed  2330  songs Augmentation
Completed  2340  songs Augmentation
Completed  2350  songs Augmentation
Completed  2360  songs Augmentation
Completed  2370  songs Augmentation
Completed  2380  songs Augmentation
Completed  2390  songs Augmentation
Completed  2400  songs Augmentation
Done...
