In [44]:
from pathlib import Path
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os,random
import numpy as np
import augmentation
import collections
import warnings
warnings.simplefilter('ignore')

In [2]:
path=Path("genres")
genre_class=[]
audio_data=[]
cat_2_num={}
numeric_category=-1

for folder in path.glob("*"):
    numeric_category+=1
    category=str(folder).split("/")[-1]
    cat_2_num[category]=numeric_category

    for file in folder.glob("*.au"):
        data=str(file).split("/")[-1]
        audio_data.append(data)
        genre_class.append(cat_2_num[category])


In [3]:
#split datset
zip_list=list(zip(audio_data,genre_class))
random.seed(8)
random.shuffle(zip_list)

train_set=zip_list[:int(0.8*(len(zip_list)))]
test_set=zip_list[int(0.8*(len(zip_list))):int(0.9*(len(zip_list)))]
val_set=zip_list[int(0.9*(len(zip_list))):]


In [4]:
#define clip path and load 
def audio_clip(clip):
    folder_name=clip[0].split(".")[0]
    file_path=path/folder_name/clip[0]
    audio_file,sr=librosa.load(file_path)#load file as time series in numpy array.sampling rate=22050
    return audio_file,folder_name

In [22]:
def create_images(data,target_folder):
    #save training set images into the target directory
    for clip in train_set:
        audio_file=audio_clip(clip)
        if target_folder=="train set":
            audio_file=augmentation.augmentation(audio_file)
            mel=list(map(librosa.feature.melspectrogram,audio_file))#compute melspectrogram
            for i in range(len(mel)):
                librosa.display.specshow(librosa.power_to_db(mel[i],ref=np.max),y_axis="off",x_axis="off")#plot melspectrogram
        else:
            mel=librosa.feature.melspectrogram(audio_file)
        
        #save melspectrograms in target folders
        image_name=os.path.splitext(clip[0])[0]
        dir_path=Path(target_folder)/folder_name
        dir_path.mkdir(parents=True,exist_ok=True)
        plt.axis("off")
        plt.savefig(dir_path/(image_name+str(i)+".jpg"),bbox_inches='tight',pad_inches = 0)
        plt.close()

create_images(train_set,"train set")
create_images(val_set,"val set")
create_images(test_set,"test set")        

In [5]:
from librosa.feature import zero_crossing_rate,rmse,mfcc,chroma_stft,spectral_centroid,spectral_bandwidth,spectral_rolloff,spectral_contrast
from librosa.beat import tempo

In [116]:
#extract features for machine learning models
def create_features(audio):
    #time domain features
    zcr=zero_crossing_rate(audio)#array of fractions of zero crossing of each frame
    rms=rmse(audio)#array of rmse of each frame
    tempo_=tempo(audio)#beats per minute (scaler value)
    
    #frequency domain features
    mfcc_=mfcc(audio,n_mfcc=20)#computes mel-freq cepstral coefficients
    chroma=chroma_stft(audio)#computes chroma bins(12) for each frame
    spec_cent=spectral_centroid(audio)#centroid frequencies
    spec_band=spectral_bandwidth(audio,p=2)#pth order moment about spectral centroid
    spec_cont=spectral_contrast(audio)#min. max. difference between frequency bands
    spec_rolloff=spectral_rolloff(audio)#roll off frequency
    
    #take mean of each feature across all frames
    features=[zcr,rms,spec_cent,spec_band,spec_rolloff,spec_cont,mfcc_,chroma]
    features=list(map(lambda x : np.mean(x,axis=1),features))
    features=[tempo_]+features
    
    return features

In [117]:
def extract_features(dataset):
    temp=[]
    count=0
    for clip in dataset:
        
        audio,label=audio_clip(clip)
        audio_clips=augmentation.augmentation(audio)
        for audio in audio_clips:
            feature=create_features(audio)
            flat_list=[value for sublist in feature for value in sublist]#flatten the extracted list    
            temp.append([label]+flat_list)
    
    return temp   

In [None]:
#extract features of all examples
features=extract_features(zip_list)

In [121]:
#create column list for dataframe
column=["label","tempo_","zcr","rms","spec_cent","spec_band","spec_rolloff"]
mfcc_col=["mfcc"+str(i) for i in range(20)]
chroma_col=["chroma"+str(i) for i in range(12)]
contrast=["spec_cont"+str(i) for i in range(7)]
column.extend(contrast+mfcc_col+chroma_col)

In [129]:
#create dataframe
df1=pd.DataFrame(features,columns=column)
df1.head()

Unnamed: 0,label,tempo_,zcr,rms,spec_cent,spec_band,spec_rolloff,spec_cont0,spec_cont1,spec_cont2,...,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,chroma8,chroma9,chroma10,chroma11
0,metal,117.453835,0.162327,0.038921,2936.132557,2479.833573,5822.957829,16.848865,13.887696,18.251308,...,0.360513,0.286562,0.329242,0.313275,0.489064,0.4961,0.421641,0.479768,0.410471,0.417285
1,metal,117.453835,0.150923,0.037377,2926.178141,2504.680892,5890.480158,16.506949,13.835501,19.554635,...,0.312121,0.260155,0.29622,0.291122,0.453535,0.45696,0.385108,0.439804,0.347381,0.387353
2,metal,117.453835,0.123358,0.055793,2474.533343,2374.290222,5245.533246,18.233169,13.458521,17.939224,...,0.417746,0.420817,0.508065,0.412398,0.504167,0.497188,0.403637,0.403637,0.406446,0.53062
3,metal,117.453835,0.120436,0.066293,2450.428345,2348.388663,5207.95502,19.395257,13.875947,16.829902,...,0.389502,0.414442,0.61197,0.430375,0.528246,0.510681,0.565934,0.649207,0.429904,0.418972
4,metal,117.453835,0.221166,0.039193,4010.675765,3196.851258,8097.183829,15.68869,13.183054,17.842758,...,0.344272,0.297801,0.330139,0.325012,0.481001,0.482537,0.408336,0.46025,0.374414,0.416105


In [132]:
df1.to_csv("features.csv")