In [199]:
import numpy as np
import matplotlib.pyplot as plt
import os, librosa

import IPython.display as ipd
import librosa.display

import shutil

from pydub import AudioSegment

In [206]:
# function to split audio file into 5 second segments
def split_audio(path):
    interval = 5*1000  # 5 seconds
    segments = []
    audio = AudioSegment.from_file(path)
    
    for i in range(0, len(audio), interval):
        # find end of current segment
        end = i + interval

        # avoid exceeding the file length
        if end >= len(audio): end = len(audio)
        
        # extract segment
        if len(audio[i:end]) > 4500:
            segments.append(audio[i:end])
    return segments

# function to convert an audio file into a mel spectogram
def make_melspec(audio_path):
    # load the audio file
    audio_signal, sample_rate = librosa.load(audio_path)

    # compute mel-spectogram
    melSpec = librosa.feature.melspectrogram(y = audio_signal, sr = sample_rate, n_mels = 128)
    # convert to logarithmic scale
    melSpec_dB = librosa.power_to_db(melSpec, ref=np.max)

    return melSpec_dB, sample_rate

In [208]:
''' 
Split 30 second audio files from original data folder into 5 second segments
and save them to a new data folder called 'splitdata', which has subfolders
for each genre.
'''
ogdir = 'data/genres_original/' 
splitdir = 'splitdata/original'

for genre_dir, _, files in os.walk(ogdir):
    # make genre folders
    genre = os.path.basename(genre_dir)
    new_genre_dir = os.path.join(splitdir, genre)
    os.makedirs(new_genre_dir, exist_ok=True)

    for file in files:
        if file != 'jazz.00054.wav':
            # split audio file into segments
            segmented_file = split_audio(os.path.join(genre_dir, file))
            # save segments to new data directory
            for i in range(len(segmented_file)-1):
                segment = segmented_file[i]
                
                # new file name
                genre, num, id = file.split(sep='.')
                segment_name = genre + num + '_{}'.format(i) + '.wav'
                segment_dir = os.path.join(new_genre_dir, segment_name)

                #  export to dataset folder
                segment.export(segment_dir, format='wav')            

In [209]:
''' 
Convert each 5 second audio clip in the splitdata directory into a 
mel spectogram and save the images to the 'spectogram' subfolder (also 
split into genres).
'''
imgdir = 'splitdata/spectograms'

for genre_dir, _, files in os.walk(splitdir):
    genre = os.path.basename(genre_dir)
    
    aud_genre_dir = os.path.join(splitdir, genre)
    img_genre_dir = os.path.join(imgdir, genre)

    os.makedirs(img_genre_dir, exist_ok=True)
    print(genre)

    for file in files:
        filepath = os.path.join(img_genre_dir, file)  
        img_filepath = filepath.split(sep='.')[0] + '.png'
        aud_filepath = os.path.join(aud_genre_dir, file)

        # get mel spectogram
        melspec, sample_rate = make_melspec(aud_filepath)

        # save spectogram to figure and export
        plt.figure(figsize=(10, 5))
        librosa.display.specshow(melspec, sr = sample_rate, fmax=8000)

        plt.savefig(img_filepath)
        plt.close()


original
blues
classical
country
disco
hiphop
jazz
metal
pop
reggae
rock


In [212]:
''' 
Split each genre in the splitdata spectogram folder 80/20 and
save them to new training and testing directories.
'''
# directories for original datset and training/testing sets
og_dir = 'splitdata/spectograms'
train_dir, test_dir = 'splitdata/training', 'splitdata/testing'

# creating training and testing directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

testlabels, trainlabels = test_dir + '/labels.txt', train_dir + '/labels.txt'
ftest, ftrain = open(testlabels, 'w'), open(trainlabels, 'w')

# iterate through each genre subfolder
for genre_dir, _, images in os.walk(og_dir):
    # get genre label
    genre = os.path.basename(genre_dir)
    
    # split 80/20 for training/testing
    split_point = round(len(images) * 0.20)
    test_imgs, train_imgs = images[:split_point], images[split_point:]

    for img in test_imgs:
        shutil.copy(os.path.join(genre_dir, img), test_dir)
        ftest.write('{}, {}\n'.format(img, genre))
    for img in train_imgs: 
        shutil.copy(os.path.join(genre_dir, img), train_dir)
        ftrain.write('{}, {}\n'.format(img, genre))

ftest.close()
ftrain.close()