### This document is used to extract audio features.

##### Information on public datasets we use:
**name**: Medley-solos-DB

**DOI**: 10.5281/zenodo.3464194

**Publication date**: September 29, 2019

**URL**: [medley-solos-DB](https://zenodo.org/record/3464194#.Ymyk9ugza3D)

**Version:**1.2

In [1]:
# Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn

# import Librosa
import librosa
import librosa.display
import IPython.display as ipd

# Stop popping up warning messages
import warnings
warnings.filterwarnings('ignore')
import csv

In [2]:
import os
general_path = 'E:/datasets/medley/Medley-solos-DB' # the filepath to the dataset folder

fileList = os.listdir(general_path)

In [29]:
# Feature list
featureList = [None]*60

In [30]:
filename=0
length=0
chroma_stft_mean=0
chroma_stft_var=0
rms_mean=0
rms_var=0
spectral_centroid_mean=0
spectral_centroid_var=0
spectral_bandwidth_mean=0
spectral_bandwidth_var=0
rolloff_mean=0
rolloff_var=0
zero_crossing_rate_mean=0
zero_crossing_rate_var=0
harmony_mean=0
harmony_var=0
perceptr_mean=0
perceptr_var=0
tempo=0
mfcc = [None]*20
label=0

In [31]:
for file in range(len(fileList)):
    y, sr = librosa.load(os.path.join(general_path, fileList[file]))
    audio_file, _ = librosa.effects.trim(y)
    
    # 0. filename 
    filename = fileList[file]

    # 1. length
    lenShape = np.shape(audio_file)
    length = lenShape[0]

    # 2. chroma_stft_mean
    ch = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stft_mean = np.mean(ch)


    # 3. chroma_stft_var
    chroma_stft_var = np.var(ch)


    # 4. rms_mean
    rms = librosa.feature.rms(audio_file)
    rms_mean = np.mean(rms)


    # 5. rms_var
    rms_var = np.var(rms)


    # 6. spectral_centroid_mean
    spectral_centroids = librosa.feature.spectral_centroid(audio_file, sr=sr)[0]
    spectral_centroid_mean = np.mean(spectral_centroids)


    # 7. spectral_centroid_var
    spectral_centroid_var = np.var(spectral_centroids)


    # 8. spectral_bandwidth_mean
    spectral_bandwidth = librosa.feature.spectral_bandwidth(audio_file, sr)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)


    # 9. spectral_bandwidth_var
    spectral_bandwidth_var = np.var(spectral_bandwidth)

    # 10. rolloff_mean
    rolloff = librosa.feature.spectral_rolloff(audio_file, sr)
    rolloff_mean = np.mean(rolloff)


    # 11. rolloff_var
    rolloff_var = np.var(rolloff)


    # 12. zero_crossing_rate_mean
    zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_file, frame_length = 2048, hop_length = 512, center = True)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate)


    # 13. zero_crossing_rate_var
    zero_crossing_rate_var = np.var(zero_crossing_rate)


    # 14. harmony_mean
    y_harm, y_perc = librosa.effects.hpss(audio_file)
    harmony_mean = np.mean(y_harm)


    # 15. harmony_var
    harmony_var = np.var(y_harm)


    # 16. perceptr_mean
    perceptr_mean = np.mean(y_perc)


    # 17. perceptr_var
    perceptr_var = np.var(y_perc)


    # 18. tempo
    tempo, _ = librosa.beat.beat_track(audio_file, sr = sr)


    # 19-58. mfcc
    mfcc = librosa.feature.mfcc(audio_file, sr, n_mfcc=20)


    # 59. label
    num = fileList[file][fileList[file].index('_',18)-1]
    num = int(num)
    if num == 0:
        label = 'clarinet'
    elif num == 1:
        label = 'guitar'
    elif num == 2:
        label = 'singer'
    elif num == 3:
        label = 'flute'
    elif num == 4:
        label = 'piano'
    elif num == 5:
        label = 'saxophone'
    elif num == 6:
        label = 'trumpet'
    else:
        label = 'violin'

        
        

    featureList[0] = filename
    featureList[1] = length
    featureList[2] = chroma_stft_mean
    featureList[3] = chroma_stft_var
    featureList[4] = rms_mean
    featureList[5] = rms_var
    featureList[6] = spectral_centroid_mean
    featureList[7] = spectral_centroid_var
    featureList[8] = spectral_bandwidth_mean
    featureList[9] = spectral_bandwidth_var
    featureList[10] = rolloff_mean
    featureList[11] = rolloff_var
    featureList[12] = zero_crossing_rate_mean
    featureList[13] = zero_crossing_rate_var
    featureList[14] = harmony_mean
    featureList[15] = harmony_var
    featureList[16] = perceptr_mean
    featureList[17] = perceptr_var
    featureList[18] = tempo

    # mfccs
    for i in range(20):
        featureList[2*i+19] = np.mean(mfcc[i])
        featureList[2*i+20] = np.var(mfcc[i])

    featureList[59] = label        
        
        

    with open('test.csv', "a+",newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(featureList)
        

In [3]:
data = pd.read_csv('test.csv')
data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,Medley-solos-DB_test-0_003d41a8-afad-501f-fbc3...,65536,0.182081,0.095443,0.147271,0.017241,1094.313914,179691.875796,1475.468772,201494.086305,...,303.29822,15.995791,186.1982,7.788494,69.82964,-4.385258,71.37893,-4.615896,180.1503,clarinet
1,Medley-solos-DB_test-0_007a6103-bf60-528a-f36f...,65536,0.2334,0.100379,0.021083,0.000483,909.431098,224640.877652,1470.42397,402956.36672,...,68.047874,-9.915801,55.444805,-3.300354,37.68224,2.397617,13.716176,1.861213,13.179912,clarinet
2,Medley-solos-DB_test-0_012639dd-3f06-57f8-f61c...,65536,0.141079,0.079732,0.059619,0.001054,975.621702,63550.597607,1149.886563,117716.763246,...,182.40906,16.87218,311.93726,30.601374,309.70828,38.97951,445.83133,28.731062,501.72363,clarinet
3,Medley-solos-DB_test-0_023737df-a7c6-5741-fa57...,65536,0.168623,0.084713,0.062023,0.001152,1814.772456,91530.238797,1331.823918,27289.079131,...,65.68341,5.982809,91.10795,6.496395,76.64593,-9.803253,408.77176,6.004927,130.09984,clarinet
4,Medley-solos-DB_test-0_024f9a79-e881-550d-fcb2...,65536,0.131231,0.078331,0.010482,6e-06,521.215878,13631.568173,926.265569,34679.129126,...,62.80104,-4.019889,15.816593,7.639528,197.40657,26.543125,153.67769,37.17821,70.997604,clarinet
