# Feature Classes

In [1]:
# environments
import pandas as pd
import os
from tqdm import tqdm
import numpy as np
import librosa
from scipy.stats import kurtosis
from scipy.stats import skew

# Feature engineering

In [27]:
# Utils
def get_feature_stats(features):
    """ 
    Get the summary statistics from the mfcc vectors in the extract features function
    """
    result = {}
    for k, v in features.items():
        result['{}_max'.format(k)] = np.max(v)
        result['{}_min'.format(k)] = np.min(v)
        result['{}_mean'.format(k)] = np.mean(v)
        result['{}_std'.format(k)] = np.std(v)
        result['{}_kurtosis'.format(k)] = kurtosis(v)
        result['{}_skew'.format(k)] = skew(v)
    return result

In [72]:
# Extract features
def extract_features(y,sr=22050,n_fft=1024,hop_length=512, n_mfcc=20):
    """ 
    Get the features from a specific track
    """
    features = {'centroid': librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
                'flux': librosa.onset.onset_strength(y=y, sr=sr).ravel(),
                'rmse': librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel(),
                'zcr': librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel(),
                'contrast': librosa.feature.spectral_contrast(y, sr=sr).ravel(),
                'bandwidth': librosa.feature.spectral_bandwidth(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
                'flatness': librosa.feature.spectral_flatness(y, n_fft=n_fft, hop_length=hop_length).ravel(),
                'rolloff': librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
                'tempo':librosa.beat.tempo(y=y,sr=sr,hop_length=hop_length)[0]}

    # MFCC treatment
    mfcc = librosa.feature.mfcc(y, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
    dict_agg_features = get_feature_stats(features)

    return dict_agg_features

In [184]:
# Data cleaning

def envelope(y, sr, threshold):
    """ 
    Data cleaning using an enveloppe
    """
    mask = []
    y = pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(sr/10), min_periods=1, center=True).mean()
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask

In [80]:
sr = 22050
track_duration = 30 # measured in seconds
samples_per_track = sr * track_duration
num_segments = 5 # using 6 seconds (30/5)
samples_per_segment = int(samples_per_track / num_segments)
samples_per_segment

In [212]:
#track_duration = 30 # measured in seconds

def get_data(save_locally = False, sub_segments = True, num_segments=5, cleaning = True, threshold = 0.005):
    """ 
    Get the cleaned data from the dataset
    """
    # Final list of features
    arr_features=[]

    # Get data path
    path = os.path.join(os.getcwd(),'..','data')
    # Get list of genres
    genres = [f for f in os.listdir(path)]
    genres = genres[:2]
    
    for idx,genre in tqdm(enumerate(genres),total=len(genres)):
        # Get genre pathing
        genre_path = os.path.join(path,genre)
        
        for fname in os.listdir(genre_path)[:2]:
            # Get file pathing
            file_path = os.path.join(genre_path,fname)
            y, sr = librosa.load(file_path, duration=30)

            # Cleaning option 
            if cleaning == True :
                mask = envelope(y,sr,threshold)
                y = y[mask]
            
            # Split songs into sub segments
            if sub_segments == True:
                
                # Get subsegments
                track_duration = round(len(y)/sr)
                samples_per_track = sr * track_duration
                samples_per_segment = int(samples_per_track / num_segments)

                for d in range(num_segments):
                    start = samples_per_segment * d
                    finish = start + samples_per_segment
                    new_y = y[start:finish]
                    # Get features
                    dict_features=extract_features(y=new_y,sr=sr)
                    # Get label
                    dict_features['label']=idx
                    # Keep name
                    dict_features['id']=f"{fname}_{d}"
                    # Total track duration
                    dict_features['total_duration']=track_duration
                    dict_features['sub_duration']= round((finish-start)/sr)
                    # Append to list
                    arr_features.append(dict_features)
                    
             # Don't split songs into subsegment
            else:
                # Get features
                dict_features=extract_features(y=y,sr=sr)
                # Get label
                dict_features['label']=idx
                # Keep name
                dict_features['id']=f"{fname}"
                # Append to list
                arr_features.append(dict_features)
                
    # Create a dataframe with the features
    df=pd.DataFrame(data=arr_features)
    
    # ----------------------------------------------------------
    # Print final output details
    # ----------------------------------------------------------

    if cleaning == True :
        print('# Envelope used for data cleaning')

    if sub_segments == True:
        print(f'# Split each sound into {num_segments} sub segments')
    else :
        print('# Sounds not split into sub segments')
    
    if save_locally == False:
        print('# Dataset generated but not saved')
        print('# Shape of the dataset',df.shape)
    else:
        print('# Dataset generated')
        print('# Shape of the dataset',df.shape)
        df.to_csv(os.path.join(os.getcwd(),'..','dataset','train_data.csv'),index=False)
        print('# Dataset saved')
        
    return df

In [213]:
df_test = get_data(save_locally = False ,sub_segments = True, cleaning =True, threshold = 0.005)
df_test

100%|██████████| 2/2 [00:03<00:00,  1.68s/it]

# Envelope used for data cleaning
# Split each sound into 5 sub segments
# Dataset generated but not saved
# Shape of the dataset (20, 178)





Unnamed: 0,centroid_max,centroid_min,centroid_mean,centroid_std,centroid_kurtosis,centroid_skew,flux_max,flux_min,flux_mean,flux_std,...,mfcc_19_max,mfcc_19_min,mfcc_19_mean,mfcc_19_std,mfcc_19_kurtosis,mfcc_19_skew,label,id,total_duration,sub_duration
0,4053.985673,991.341998,1728.781762,369.483901,10.528528,2.361501,7.882843,0.0,1.412956,1.40329,...,26.860922,-15.932612,2.09078,8.337629,-0.291359,0.395366,0,blues.00000.wav_0,30,6
1,3030.717258,925.832373,1658.654534,351.213524,1.045425,0.633993,9.596346,0.0,1.341359,1.559991,...,17.833252,-16.320734,2.600236,6.429453,-0.138773,-0.13677,0,blues.00000.wav_1,30,6
2,3964.565091,1067.787885,1757.72731,451.102013,4.646342,1.585787,8.330933,0.0,1.366644,1.458756,...,20.262171,-18.04121,0.870194,6.641301,0.135161,0.286205,0,blues.00000.wav_2,30,6
3,3754.865359,1184.838861,1783.879894,370.250979,5.515787,1.897505,7.089076,0.0,1.345603,1.411011,...,19.205822,-16.667665,1.577898,6.720253,-0.40037,-0.111567,0,blues.00000.wav_3,30,6
4,3584.4998,1013.633703,1702.469318,325.851244,5.202042,1.500669,9.406673,0.0,1.411663,1.558614,...,33.518887,-22.348665,1.635591,9.132927,0.702181,0.4521,0,blues.00000.wav_4,30,6
5,3367.213639,359.587911,1318.424259,577.432152,0.069351,0.623232,12.539949,0.0,1.547621,2.235441,...,23.588488,-16.82774,-1.219462,6.586344,0.214954,0.204908,0,blues.00001.wav_0,30,6
6,4193.612493,478.931537,1324.658722,638.531669,4.022084,1.646162,13.008527,0.0,1.368322,2.166972,...,32.69582,-16.229574,-0.608171,9.331887,1.482329,1.196855,0,blues.00001.wav_1,30,6
7,5443.710004,520.933379,1524.868586,636.759018,12.686876,2.726991,16.973648,0.0,1.423859,1.973039,...,18.592968,-18.697273,2.156039,7.073184,-0.31599,0.028503,0,blues.00001.wav_2,30,6
8,4230.283347,588.119751,1669.3875,656.984103,2.159935,1.268268,11.288823,0.0,1.458563,1.745786,...,26.096724,-22.842854,-0.498263,7.438746,0.081563,0.07886,0,blues.00001.wav_3,30,6
9,2882.512051,463.056342,1418.946906,492.791796,0.056643,0.497271,9.968738,0.0,1.332767,1.738239,...,25.724041,-17.037224,3.196162,6.947302,0.181847,-0.116061,0,blues.00001.wav_4,30,6


# Playground

### for single song

In [203]:
# Playground area
def get_style_songs(style):
    music_folder = os.path.join('..','data')
    style_path = os.path.join(music_folder, style)
    file_names = [f for f in os.listdir(style_path) if f.endswith('.wav')]
    return style_path, file_names

style_path, file_names = get_style_songs('blues')
blues101 = os.path.join(style_path,file_names[0])
signal, sample_rate = librosa.load(blues101, sr=22050)
