In [4]:
from pathlib import Path
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os,random
import numpy as np
import augmentation
import collections

In [5]:
path=Path("genres")
genre_class=[]
audio_data=[]
cat_2_num={}
numeric_category=-1

for folder in path.glob("*"):
    numeric_category+=1
    category=str(folder).split("/")[-1]
    cat_2_num[category]=numeric_category

    for file in folder.glob("*.au"):
        data=str(file).split("/")[-1]
        audio_data.append(data)
        genre_class.append(cat_2_num[category])

#num_2_cat={value:key for key,value in cat_2_num.items()}        

In [6]:
#split datset
zip_list=list(zip(audio_data,genre_class))
random.seed(8)
random.shuffle(zip_list)

train_set=zip_list[:int(0.8*(len(zip_list)))]
test_set=zip_list[int(0.8*(len(zip_list))):int(0.9*(len(zip_list)))]
val_set=zip_list[int(0.9*(len(zip_list))):]


In [30]:
def audio_clip(clip):
    folder_name=clip[0].split(".")[0]
    file_path=path/folder_name/clip[0]
    audio_file,sr=librosa.load(file_path)#load file as time series in numpy array.sampling rate=22050
    return audio_file,folder_name

In [22]:
def create_images(data,target_folder):
    #save training set images into the target directory
    for clip in train_set:
        audio_file=audio_clip(clip)
        if target_folder=="train set":
            audio_file=augmentation.augmentation(audio_file)
            mel=list(map(librosa.feature.melspectrogram,audio_file))#compute melspectrogram
            for i in range(len(mel)):
                librosa.display.specshow(librosa.power_to_db(mel[i],ref=np.max),y_axis="off",x_axis="off")#plot melspectrogram
        else:
            mel=librosa.feature.melspectrogram(audio_file)
        
        #save melspectrograms in target folders
        image_name=os.path.splitext(clip[0])[0]
        dir_path=Path(target_folder)/folder_name
        dir_path.mkdir(parents=True,exist_ok=True)
        plt.axis("off")
        #plt.gca().xaxis.set_major_locator(plt.NullLocator())
        #plt.gca().yaxis.set_major_locator(plt.NullLocator())
        plt.savefig(dir_path/(image_name+str(i)+".jpg"),bbox_inches='tight',pad_inches = 0)
        plt.close()


In [None]:
create_images(train_set,"train set")
create_images(val_set,"val set")
create_images(test_set,"test set")

In [12]:
from librosa.feature import zero_crossing_rate,rmse,mfcc,chroma_stft,spectral_centroid,spectral_bandwidth,spectral_rolloff,spectral_contrast
from librosa.beat import tempo

In [58]:
#extract features for machine learning models
def extract_features(audio,label):
    #time domain features
    zcr=zero_crossing_rate(audio)#array of fractions of zero crossing of each frame
    rms=rmse(audio)#array of rmse of each frame
    tempo_=tempo(audio)#beats per minute (scaler value)
    
    #frequency domain features
    mfcc_=mfcc(audio,n_mfcc=20)#computes mel-freq cepstral coefficients
    chroma=chroma_stft(audio)#computes chroma bins(12) for each frame
    spec_cent=spectral_centroid(audio)#centroid frequencies
    spec_band=spectral_bandwidth(audio,p=2)#pth order moment about spectral centroid
    spec_cont=spectral_contrast(audio)#min. max. difference between frequency bands
    spec_rolloff=spectral_rolloff(audio)#roll off frequency
    print(mfcc_)
    
    
    

In [59]:
x,y=audio_clip(train_set[0])
c=extract_features(x,y)


  hop_length=hop_length))


[[-185.75844174 -185.08224408 -205.88324486 ... -186.4421659
  -186.74646656 -186.11255643]
 [  81.69084719   80.81920988   78.60971069 ...   77.04790722
    77.62412022   73.99956339]
 [ -26.14461526  -25.88990293  -25.05485059 ...  -13.56072724
   -16.26752282  -14.77464918]
 ...
 [  14.86819169   10.38473117    3.13663396 ...    9.86211682
     6.45083576    0.28265808]
 [  -8.6967483   -12.21011708  -13.06366112 ...    9.15883259
     9.90078226    5.65830886]
 [  10.44503383    8.63962071    6.01686028 ...    9.00672282
     5.23834012    1.70440875]]


In [55]:
#zcr-(1,1293)
#rms-(1,1293)
#tempo=(1,)
#mfcc=(20,1293)
#chroma=(12,1293)
#cent=(1,1293)
#band=(1,1293)
#cont(7,1293)
#roll(1,1293)