In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import librosa
import librosa.display
import math
from sklearn.model_selection import train_test_split
import seaborn as sns
from glob import glob
import IPython.display as ipd
from itertools import cycle
import os
import gc

In [2]:
# parameters needed to correctly load an process the audio files

path = "../input/gtzan-dataset-music-genre-classification/"

path_audio_files = path + "Data/genres_original/"

path_imgs = "./mel_spectrogram_imgs/"

hop_length = 256

n_fft = 512

sample_rate = 16000

genre_dict = {"blues":0,"classical":1,"country":2,"disco":3,"hiphop":4,"jazz":5,"metal":6,"pop":7,"reggae":8,"rock":9}

In [3]:
def split_in_clips(audio):
    
    audio_length = len(audio)
    
    if (audio_length % 12 == 0):
        sample_length = audio_length/12
    else:
        sample_length = math.floor(audio_length/12)
    
    audio_samples = []
    for i in range(1, 12):
        start = (i-1)*length
        audio_samples.append(audio[start: start+(2*length)-1])
    
    return audio_samples

In [4]:
def split_in_clips_fixed(audio):
    
    length = 40000
    audio_samples = []
    
    for i in range(1, 12):
        start = (i-1)*length
        audio_samples.append(audio[start: start+(2*length)-1])
        # print( start, start+(2*length)-1)
    
    return audio_samples
    
                          

In [5]:
print("Transforming the Audio Files into Mel Spectrograms:")
mel_spectogram_data = {}
for genre in genre_dict.keys():
    print("\t",genre)
    
    mel_spectogram_data[genre] = []

    for name in glob(path_audio_files + genre + "/*"):
            if(name != "../input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav"):
                
                # load the audio file from the path and save the original sampling rate 22050 Hz
                audio, sr = librosa.load(name, mono = True)
                
                # resample the audio so that it fits with the sampling rate stated in the paper
                audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate)
                
                
                # what we have to do now is split the audio in 5 seconds clips
                # upon inspecting the length of the audio what we notice is that
                # not all of them are exactly 30s long, we opted to split each
                # audio file in 11 clips as stated in the paper with 50% overlap
                # which produces clips of approximately 5s 
                
                for clip in split_in_clips_fixed(audio):
                    S_mel = librosa.feature.melspectrogram(y = clip, sr=sample_rate ,hop_length = hop_length, n_fft = n_fft)
                    S_mel_db = librosa.amplitude_to_db(S_mel, ref=np.max)
                    mel_spectogram_data[genre].append(S_mel_db)
                    

Transforming the Audio Files into Mel Spectrograms:
	 blues
	 classical
	 country
	 disco
	 hiphop
	 jazz
	 metal
	 pop
	 reggae
	 rock


In [6]:
                             
print("Saving the Mel Spectrogram Images:")

os.mkdir(path_imgs)
for genre in genre_dict.keys():
    print("\t",genre)
    try:
        os.mkdir(path_imgs + genre)
    except:
        pass
    

Saving the Mel Spectrogram Images:


FileExistsError: [Errno 17] File exists: './mel_spectrogram_imgs/'

In [7]:
genres = list(genre_dict.keys())[:5]
for genre in genres:
    print("\t",genre)

    for i in range(len(mel_spectogram_data[genre])):

        fig, ax = plt.subplots(1, figsize=(4.32,2.82))

        img = librosa.display.specshow(mel_spectogram_data[genre][i], sr = sample_rate, hop_length = hop_length,ax=ax)

        fig.savefig(path_imgs + genre + "/" + genre + "_" + str(i) + ".png", bbox_inches='tight', pad_inches=0)

        plt.close()


	 blues
	 classical
	 country
	 disco
	 hiphop


In [8]:
genres = list(genre_dict.keys())[5:]
for genre in genres:
    print("\t",genre)

    for i in range(len(mel_spectogram_data[genre])):

        fig, ax = plt.subplots(1, figsize=(4.32,2.82))

        img = librosa.display.specshow(mel_spectogram_data[genre][i], sr = sample_rate, hop_length = hop_length,ax=ax)

        fig.savefig(path_imgs + genre + "/" + genre + "_" + str(i) + ".png", bbox_inches='tight', pad_inches=0)

        plt.close()


	 jazz
	 metal
	 pop
	 reggae
	 rock
