In [145]:
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import fma_modules.utils as fma_utils
import IPython.display as ipd
import regex as re

read_files: takes a path to one folder in FMA dataset and returns a list of paths to songs in that folder

get_name: takes a path to one song and extracts track name using regex

calc_features: takes a path to one song and returns a list of features for one song

create_features_df: take a path to one folder in FMA dataset and returns df with the name of the track and extractd features 

In [146]:
#returns a list of paths to auidio files from a folder 

def read_files(directory):
    files = []
    for file in os.listdir(directory):
        path = os.path.join(directory, file)
        files.append(path)
    return files

#read_files("project_data_source/free_music_archive/fma_small/001")

In [147]:
#extracts name of the track from path 
def get_name(path):
    match = re.search(r'([^\/]+)(?=(\.mp3))', path)
    
    if match:
        matched_str = match.group()
        return matched_str


In [148]:
path = "project_data_source/free_music_archive/fma_small/001"

song = "project_data_source/free_music_archive/fma_small/001/001039.mp3"

In [149]:
ipd.Audio(song)

https://towardsdatascience.com/extract-features-of-music-75a3f9bc265d

Zero Crossing Rate
The zero crossing rate is the rate of sign-changes along a signal, i.e., the rate at which the signal changes from positive to negative or back. This feature has been used heavily in both speech recognition and music information retrieval. It usually has higher values for highly percussive sounds like those in metal and rock.

Spectral Centroid
It indicates where the ”centre of mass” for a sound is located and is calculated as the weighted mean of the frequencies present in the sound. If the frequencies in music are same throughout then spectral centroid would be around a centre and if there are high frequencies at the end of sound then the centroid would be towards its end.

Spectral Rolloff
Spectral rolloff is the frequency below which a specified percentage of the total spectral energy, e.g. 85%, lies.

MFCC — Mel-Frequency Cepstral Coefficients
This feature is one of the most important method to extract a feature of an audio signal and is used majorly whenever working on audio signals. The mel frequency cepstral coefficients (MFCCs) of a signal are a small set of features (usually about 10–20) which concisely describe the overall shape of a spectral envelope.


In [150]:
#extracts features from a song and return a list of features for a given song

def calc_features(song):
    y, sr = librosa.load(song)
    y, _ = librosa.effects.trim(y)
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroids_delta = librosa.feature.delta(spectral_centroids)
    spectral_centroids_accelerate = librosa.feature.delta(spectral_centroids, order=2)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y,sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    rms = librosa.feature.rms(y=y)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    
    spectral_centroids_mean = np.mean(spectral_centroids)
    spectral_centroids_delta_mean = np.mean(spectral_centroids_delta)
    spectral_centroids_accelerate_mean = np.mean(spectral_centroids_accelerate)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    spectral_rolloff_mean = np.mean(spectral_rolloff)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate) 
    rms_mean = np.mean(rms) 
    chroma_stft_mean = np.mean(chroma_stft)
    mfccs_mean = np.mean(mfccs)
    
    return spectral_centroids_mean, spectral_centroids_delta_mean, spectral_centroids_accelerate_mean, spectral_bandwidth_mean,spectral_rolloff_mean, zero_crossing_rate_mean,rms_mean, chroma_stft_mean, mfccs_mean
    
    
calc_features(song)

(2259.2244579671465,
 0.5579229289832527,
 -0.4056371028254535,
 2284.0718907963037,
 4556.524190156613,
 0.10049856881777691,
 0.2724961,
 0.56794655,
 9.163696)

In [151]:
#returns a dataframe with the name of the track and extracted features 

def create_features_df(path):
    
    #read the audio files from 1 folder
    lis = read_files(path)
    
    #calc features for each song in the folder 
    features = [calc_features(item) for item in lis]
    features_df = pd.DataFrame(features)
    features_df.columns = ['spectral_centroids_mean' , 'spectral_centroids_delta_mean',
                      'spectral_centroids_accelerate_mean', 'spectral_bandwidth_mean', 
                      'spectral_rolloff_mean', 'zero_crossing_rate_mean', 'rms_mean', 'chroma_stft_mean',
                          'mfccs_mean']
    
    #get the song name and append to features
    name = [get_name(item) for item in lis]
    features_df['name'] = name 
    return features_df 

In [152]:
create_features_df(path)

Unnamed: 0,spectral_centroids_mean,spectral_centroids_delta_mean,spectral_centroids_accelerate_mean,spectral_bandwidth_mean,spectral_rolloff_mean,zero_crossing_rate_mean,rms_mean,chroma_stft_mean,mfccs_mean,name
0,2566.60682,0.917992,0.035971,2639.047874,5500.282284,0.114069,0.153798,0.268328,-3.687658,1681
1,2882.418658,1.260231,-0.39989,2743.565598,6149.746147,0.146213,0.119182,0.38047,-1.935596,1642
2,2130.992996,1.053612,-0.047981,2105.078599,4294.56671,0.0917,0.329319,0.444307,7.302248,1087
3,1829.630483,0.166565,0.678582,1929.607904,3710.024119,0.0935,0.10611,0.396603,-1.266372,1736
4,1683.853799,-0.547695,0.116832,1997.641437,3439.299549,0.073715,0.12655,0.245533,-6.897472,1680
5,723.566604,1.146576,0.591164,1506.881505,1181.448962,0.015566,0.278789,0.370954,3.484584,1482
6,1310.787217,0.112707,-0.147734,1661.448033,2456.90345,0.06008,0.250894,0.358733,5.604302,1443
7,2246.558959,-0.168964,-0.106003,2388.715147,4557.17469,0.092043,0.142182,0.281526,-5.617181,1682
8,2751.392558,0.984446,-0.407044,2520.707814,5727.390025,0.142598,0.242257,0.421861,1.151544,1735
9,1549.968271,-0.15285,0.487361,1875.977179,3054.242552,0.069012,0.133195,0.354782,-1.23537,1278
