In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import os
from tqdm import tqdm
import numpy as np
import librosa
from scipy.stats import kurtosis
from scipy.stats import skew

In [3]:
def extract_feat(y,sr=22050,n_fft=1024,hop_length=512):
    features = {'centroid': librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
                'flux': librosa.onset.onset_strength(y=y, sr=sr).ravel(),
                'rmse': librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel(),
                'zcr': librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel(),
                'contrast': librosa.feature.spectral_contrast(y, sr=sr).ravel(),
                'bandwidth': librosa.feature.spectral_bandwidth(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
                'flatness': librosa.feature.spectral_flatness(y, n_fft=n_fft, hop_length=hop_length).ravel(),
                'rolloff': librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()}

    # MFCC treatment
    mfcc = librosa.feature.mfcc(y, n_fft=n_fft, hop_length=hop_length, n_mfcc=20)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()

    # Get statistics from the vectors
    def feature_stats(features):
        result = {}
        for k, v in features.items():
            result['{}_max'.format(k)] = np.max(v)
            result['{}_min'.format(k)] = np.min(v)
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
            result['{}_kurtosis'.format(k)] = kurtosis(v)
            result['{}_skew'.format(k)] = skew(v)
        return result

    dict_agg_features = feature_stats(features)
    dict_agg_features['tempo'] = librosa.beat.tempo(y=y,sr=sr,hop_length=hop_length)[0]

    return dict_agg_features


In [None]:
# #def make_train_data():
#     arr_features=[]
#     os.chdir("C:\\Users\\manuj\\OneDrive\\Desktop\\Data\\Train")
#     genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
#     for idx,genre in tqdm(enumerate(genres),total=len(genres)):
#         for fname in os.listdir(genre):
#             y, sr = librosa.load(genre+'/'+fname, duration=30)
#             dict_features=extract_features(y=y,sr=sr)
#             dict_features['label']=idx
#             arr_features.append(dict_features)

#     df=pd.DataFrame(data=arr_features)
#     print(df.head())
#     print(df.shape)
#     os.chdir('..')
#     ## os.chdir('..')
#     df.to_csv('train_data.csv',index=False)


In [4]:
def train_data():
    try:
        features=[]
        os.chdir("C:\\Users\\manuj\\OneDrive\\Desktop\\FData\\Train")
        genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
        for idx,genre in tqdm(enumerate(genres),total=len(genres)):
            for fname in os.listdir(genre):
                y, sr = librosa.load(genre+'/'+fname, duration=30)
                dict_features=extract_feat(y=y,sr=sr)
                dict_features['label']=idx
                features.append(dict_features)
        df=pd.DataFrame(data=features)
        print(df.head())
        print(df.shape)
        os.chdir('..') # changing to one parent directory
        df.to_csv('train.csv',index=False)
    except Exception as e:
        print("got an exception with a jazz file, hence skipping it.")
        


In [None]:

def test_data():
    try:
        features=[]
        os.chdir("C:\\Users\\manuj\\OneDrive\\Desktop\\FData\\Test")
        genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
        for idx,genre in tqdm(enumerate(genres),total=len(genres)):
            for fname in os.listdir(genre):
                y, sr = librosa.load(genre+'/'+fname, duration=30)
                dict_features=extract_feat(y=y,sr=sr)
                dict_features['label']=idx
                features.append(dict_features)
        df=pd.DataFrame(data=features)
        print(df.head())
        print(df.shape)
        os.chdir('..')  # changing to one parent directory
        df.to_csv('test.csv',index=False)
    except Exception as e:
        print("got an exception with a jazz file, hence skipping it.")
        



In [5]:

train_data()

100%|██████████| 10/10 [03:55<00:00, 23.56s/it]

   centroid_max  centroid_min  centroid_mean  centroid_std  centroid_kurtosis  \
0   4053.985673    925.934083    1727.609343    376.860919           6.100739   
1   5430.122461    372.728961    1450.537368    615.182106           4.766406   
2   4709.894987    356.735645    1493.682380    419.421785          11.343224   
3   3692.033659    261.951363     996.731305    428.075756           2.700194   
4   4484.855104    783.165978    1749.556720    599.337692           1.670335   

   centroid_skew   flux_max  flux_min  flux_mean  flux_std  ...  \
0       1.660491  10.981943       0.0   1.392348  1.481918  ...   
1       1.475166  17.791128       0.0   1.445780  2.003516  ...   
2       1.900785  14.719387       0.0   1.639342  1.950413  ...   
3       1.111396  14.619912       0.0   1.248535  1.750339  ...   
4       1.118022  12.954987       0.0   1.646354  1.964184  ...   

   mfcc_18_kurtosis  mfcc_18_skew  mfcc_19_max  mfcc_19_min  mfcc_19_mean  \
0          0.954317      0.303927




In [6]:
test_data()

NameError: name 'test_data' is not defined