This script reads the single note saxophone audiofiles and from the goodsounds database and extracts al the essentia low level features into data.csv in the same location as the files.

In [1]:
import sqlite3
import pandas as pd
import numpy as np
from pickle import dump
import os 
import essentia.standard as es
from utilities import *

In [2]:
# Create a SQL connection to our SQLite database
con = sqlite3.connect("good-sounds_Sax_SingleNotes/database.sqlite")
df = pd.read_sql_query("SELECT * from sounds", con)
con.close()

In [3]:
#Show df
df = df.loc[df['instrument'] == 'sax_alto']
df.head()

Unnamed: 0,id,instrument,note,octave,dynamics,recorded_at,location,player,bow_velocity,bridge_position,...,attack,decay,sustain,release,offset,reference,klass,comments,semitone,pitch_reference
4639,4641,sax_alto,C#,3.0,,2014-10-17 14:00:00.000000,upf studio,raul,,,...,,,,,,1,good-sound,,37.0,442.0
4640,4642,sax_alto,D,3.0,,2014-10-17 14:00:00.000000,upf studio,raul,,,...,,,,,,1,good-sound,,38.0,442.0
4641,4643,sax_alto,D#,3.0,,2014-10-17 14:00:00.000000,upf studio,raul,,,...,,,,,,1,good-sound,,39.0,442.0
4642,4644,sax_alto,E,3.0,,2014-10-17 14:00:00.000000,upf studio,raul,,,...,,,,,,1,good-sound,,40.0,442.0
4643,4645,sax_alto,F,3.0,,2014-10-17 14:00:00.000000,upf studio,raul,,,...,,,,,,1,good-sound,,41.0,442.0


In [4]:
#Get only the relevant parameters of the df
df = df.loc[df['id'] < 6500] #get only the single note saxphone notes
df = df[['id','pack_filename', 'klass']]
df.head()

Unnamed: 0,id,pack_filename,klass
4639,4641,0000.wav,good-sound
4640,4642,0001.wav,good-sound
4641,4643,0002.wav,good-sound
4642,4644,0003.wav,good-sound
4643,4645,0004.wav,good-sound


In [5]:
#Load the path of the audio files
file = 4
audio = df['pack_filename'][4639]
path = "good-sounds_Sax_SingleNotes/saxo_raul_recordings"
microphone = ["iphone", "neumann"]
segments_dir = os.path.join(path, microphone[0])
print(segments_dir)

good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone


In [6]:
#Create a list with all the files path
segment_files = []
for index, row in df.iterrows():
    filename = row['pack_filename']
    filename = os.path.join(segments_dir, filename)
    segment_files.append(filename)
len(segment_files)

324

In [7]:
#Running music extractor for one file:
file = segment_files[0]
features, features_frames = es.MusicExtractor(lowlevelSilentFrames='drop',
                                                  lowlevelFrameSize = 2048,
                                                  lowlevelHopSize = 1024,
                                                  lowlevelStats = ['mean', 'stdev'])(file)

#Show all the features that will be extracted
scalar_lowlevel_descriptors = [descriptor for descriptor in features.descriptorNames() if 'lowlevel' in descriptor and isinstance(features[descriptor], float)]
#print("Subset of features to be considered:\n",scalar_lowlevel_descriptors)


In [8]:
#Extracting features and writing in data.csv file in the segments folder
descriptors = ['Bad-attack', 'Good-sound']

for i in range(len(descriptors)):
    data_file = os.path.join(segments_dir, descriptors[i]+'.csv')
    file_count = 0
    print("Processing", descriptors[i])
    with open(data_file, 'w') as writer:
        #adding column names as the first line in csv
        line2write = ','.join(scalar_lowlevel_descriptors + ['klass']).replace('lowlevel.','') + '\n'
        writer.write(line2write)
        for file in segment_files:
            if '.wav' in file:
                file_count +=1
                if file_count % 20 == 0:#print name of a file every 20 files
                    print(file_count, "files processed, current file: ",file)
                #Detect the attack and release of the signal
                #Load audio
                fs = 44100
                x = es.MonoLoader(filename=file)()

                #Attack detection
                attack_start, attack_end = attack_detection(x, M = 5)

                #Release detection (attack but flipped)
                x_flip = np.array(np.flip(x))
                release_start_flip, release_end_flip = attack_detection(x_flip, M = 1.5)
                release_start = len(x)-release_end_flip
                release_end = len(x)-release_start_flip
                
                #Post processing conditions
                if attack_end-attack_start<fs/10:
                    attack_end = attack_start+int(len(x)*0.05)
                
                #If we want to extract the features from the attack or not, select the right segments
                if descriptors[i] == 'Bad-attack':
                    startTime = attack_start/fs
                    endTime = attack_end/fs
                else:
                    startTime = attack_end/fs
                    endTime = release_start/fs
                
                if startTime>endTime:
                    print(file)
        
                #Extract the features
                features, features_frames = es.MusicExtractor(lowlevelSilentFrames='drop',
                                                              lowlevelFrameSize = 2048,
                                                              lowlevelHopSize = 1024,
                                                              lowlevelStats = ['mean', 'stdev'], 
                                                              startTime = startTime,
                                                              endTime = endTime)(file)
                selected_features = [features[descriptor] for descriptor in scalar_lowlevel_descriptors]
                klass = df['klass'][4639+file_count-1]
                line2write = str(selected_features)[1:-1] + ',' + klass + '\n'
                writer.write(line2write)
    print("A total of ",file_count, "files processed")
    print(descriptors[i], "processed")

Processing Bad-attack
20 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0019.wav
40 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0039.wav
60 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0059.wav
80 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0079.wav
100 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0099.wav
120 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0119.wav
140 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0139.wav
160 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0159.wav
180 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul_recordings/iphone/0179.wav
200 files processed, current file:  good-sounds_Sax_SingleNotes/saxo_raul

In [9]:

def csv_post_processing(csv_path, data_name):
    '''This function does some post processing and organizes and saves the extracted featrues from the 
    raw CSV file into an organised CSV file.'''
    
    #Load the data
    data = pd.read_csv(csv_path)
    #Scale the data
    data_modif = data.copy()
    #Let's use sklearn's preprocessing tools for applying normalisation to features
    from sklearn import preprocessing
    standard_scaler = preprocessing.StandardScaler()
    #Apply scaling to all but the last column
    standard_scaler.fit(data.iloc[:,:-1].values)
    data_modif.iloc[:,:-1] = standard_scaler.transform(data.iloc[:,:-1].values)
    # save the scaler
    dump(standard_scaler, open('scalers/'+data_name+'_scaler.pkl', 'wb'))

    #Put a single classifier into a different column
    data_test = data_modif.copy()
    data_test = pd.concat([data_test,data_test['klass'].str.split(' ', expand=True)], axis=1)

    #Put each classifier in a column and a 1 or  a 0 if the classifier is present for each audio
    goodsound = np.zeros_like(data_test.klass)
    baddynamics = np.zeros_like(data_test.klass)
    badattack = np.zeros_like(data_test.klass)
    badpitch = np.zeros_like(data_test.klass)
    badtimber = np.zeros_like(data_test.klass)
    badrichness = np.zeros_like(data_test.klass)
    n_goodsounds = 0
    n_baddynamics = 0
    n_badattack = 0
    n_badpitch = 0
    n_badtimber = 0
    n_badrichness = 0

    for index, row in data_test.iterrows():
        if (row[0] == "good-sound" or row[1] == "good-sound" or row[2] == "good-sound"):
            goodsound[index] = 1
            n_goodsounds +=1
        if (row[0] == "bad-dynamics" or row[1] == "bad-dynamics" or row[2] == "bad-dynamics"):
            baddynamics[index] = 1
            n_baddynamics += 1
        if (row[0] == "bad-attack" or row[1] == "bad-attack" or row[2] == "bad-attack"):
            badattack[index] = 1
            n_badattack += 1
        if (row[0] == "bad-pitch" or row[1] == "bad-pitch" or row[2] == "bad-pitch"):
            badpitch[index] = 1
            n_badpitch += 1
        if (row[0] == "bad-timbre" or row[1] == "bad-timbre" or row[2] == "bad-timbre"):
            badtimber[index] = 1
            n_badtimber += 1
        if (row[0] == "bad-richness" or row[1] == "bad-richness" or row[2] == "bad-richness"):
            badrichness[index] = 1
            n_badrichness += 1

    data_test['Good-sound'] = goodsound
    data_test['Bad-attack'] = badattack
    data_test['Bad-dynamics'] = baddynamics
    data_test['Bad-pich'] = badpitch
    data_test['Bad-timber'] = badtimber
    data_test['Bad-richness'] = badrichness

    data_test2 = data_test.drop(['klass', 0, 1, 2], axis=1)

    csvfilename = 'extracted_features/'+data_name+'_SingleNotes_LowLevelFeatures.csv'
    data_test2.to_csv(csvfilename, index=False)
    
    return csvfilename

In [10]:
#Load the CSV filenames and paths
data_files = []
for i in range(len(descriptors)):
    data_files.append(os.path.join(segments_dir, descriptors[i]+'.csv'))

In [11]:
#Raw data
bad_attack_data = pd.read_csv(data_files[0])
bad_attack_data.head()

Unnamed: 0,average_loudness,barkbands_crest.mean,barkbands_crest.stdev,barkbands_flatness_db.mean,barkbands_flatness_db.stdev,barkbands_kurtosis.mean,barkbands_kurtosis.stdev,barkbands_skewness.mean,barkbands_skewness.stdev,barkbands_spread.mean,...,spectral_rolloff.stdev,spectral_skewness.mean,spectral_skewness.stdev,spectral_spread.mean,spectral_spread.stdev,spectral_strongpeak.mean,spectral_strongpeak.stdev,zerocrossingrate.mean,zerocrossingrate.stdev,klass
0,0.990684,10.095438,1.135923,0.301427,0.074924,2.368462,1.616232,0.887191,0.451137,4.234679,...,129.898117,4.90124,1.480423,1930921.125,1067698.25,1.112072,0.596121,0.027945,0.007966,good-sound
1,0.990684,9.383596,1.993729,0.321588,0.086324,2.84207,2.429902,0.552658,0.823363,4.300984,...,183.006378,6.55545,2.208831,2099988.0,2040041.125,2.081831,1.406446,0.031006,0.012992,good-sound
2,0.990684,10.897723,2.365383,0.357475,0.109467,5.067965,5.651997,0.908066,0.550125,6.194994,...,2169.557373,6.666208,2.393615,2527176.0,3821892.25,3.15428,1.612562,0.03598,0.028082,good-sound
3,0.990684,12.656359,3.588907,0.354818,0.12589,2.938108,2.490078,0.387514,0.268859,9.092329,...,2686.112793,6.613125,2.361167,2055754.25,2120556.0,2.690359,1.63322,0.049479,0.050679,good-sound
4,0.990684,11.3467,2.728733,0.325424,0.114785,6.439334,6.597914,1.259283,0.773463,6.616688,...,2292.158936,5.525723,1.9869,3432943.5,6225082.0,1.757006,1.073749,0.03595,0.035617,good-sound


In [12]:
#Processed data
csvfilename =[]
for i in range(len(descriptors)):
    name = csv_post_processing(data_files[i], descriptors[i])
    csvfilename.append(name)
print(csvfilename)
test = pd.read_csv(csvfilename[0])
test.head()

['extracted_features/Bad-attack_SingleNotes_LowLevelFeatures.csv', 'extracted_features/Good-sound_SingleNotes_LowLevelFeatures.csv']


Unnamed: 0,average_loudness,barkbands_crest.mean,barkbands_crest.stdev,barkbands_flatness_db.mean,barkbands_flatness_db.stdev,barkbands_kurtosis.mean,barkbands_kurtosis.stdev,barkbands_skewness.mean,barkbands_skewness.stdev,barkbands_spread.mean,...,spectral_strongpeak.mean,spectral_strongpeak.stdev,zerocrossingrate.mean,zerocrossingrate.stdev,Good-sound,Bad-attack,Bad-dynamics,Bad-pich,Bad-timber,Bad-richness
0,0.0,-1.537955,-1.54798,-0.928237,-1.6957,-0.488244,-0.433398,-0.807756,-0.908618,-0.509257,...,-1.624232,-1.191804,-0.434584,-0.839516,1,0,0,0,0,0
1,0.0,-1.694546,-1.124391,-0.698603,-1.398426,-0.482126,-0.427291,-0.931961,-0.713257,-0.499426,...,-0.414803,-0.265128,-0.314412,-0.650184,1,0,0,0,0,0
2,0.0,-1.361467,-0.940866,-0.289844,-0.794922,-0.453375,-0.403107,-0.800005,-0.856664,-0.21862,...,0.922696,-0.029416,-0.119134,-0.081685,1,0,0,0,0,0
3,0.0,-0.9746,-0.336684,-0.320109,-0.366667,-0.480886,-0.426839,-0.993276,-1.004285,0.210939,...,0.34412,-0.005792,0.410794,0.769599,1,0,0,0,0,0
4,0.0,-1.2627,-0.761442,-0.654911,-0.656255,-0.435661,-0.396007,-0.669606,-0.739447,-0.1561,...,-0.819906,-0.645595,-0.120332,0.202176,1,0,0,0,0,0


In [15]:
#used for saving all the feature names to a text file (usefull for later)
feature_names = list(test.columns.values)

#Write selected features to txt file
folder = 'extracted_features'
filename = 'feature_names'
path = os.path.join(folder, filename)

if os.path.exists(path+".txt"):
    os.remove(path+".txt")

f = open(path+".txt","a")
for feature_name in feature_names:
    f.write(feature_name)
    f.write(",")    
f.close()

## Old way, not useful anymore

In [14]:
bad_attack_data.isnull().sum().sum()#sums a matrix of True/False values obtained by checking if each value is Nan
good_sound_data.isnull().sum().sum()#sums a matrix of True/False values obtained by checking if each value is Nan

NameError: name 'good_sound_data' is not defined

#Drop Low variance columns
low_var_features = []
for column in data.columns:
  if column != 'klass':
    variance = np.var(data[column])
    mean = np.mean(data[column])
    if variance == mean * 0.001:
      print(column, 'variance =',np.var(data[column]))
      low_var_features.append(column)

print('Data shape before droppping features:', data.shape)
data = data.drop(columns=low_var_features)
scalar_lowlevel_descriptors = list(data.columns)[:-1]
print('Data shape after droppping features:', data.shape)

In [None]:
#Read data
data = pd.read_csv(r'good-sounds/sound_files/saxo_raul_recordings/neumann/data.csv')
data.head()

In [None]:
data_modif = data.copy()
#Let's use sklearn's preprocessing tools for applying normalisation to features
from sklearn import preprocessing
standard_scaler = preprocessing.StandardScaler()
#Apply scaling to all but the last column
standard_scaler.fit(data.iloc[:,:-1].values)
data_modif.iloc[:,:-1] = standard_scaler.transform(data.iloc[:,:-1].values)
# save the scaler
dump(standard_scaler, open('standard_scaler.pkl', 'wb'))

In [None]:
#Not able to count it "correctly"
data_modif.klass.value_counts()

In [None]:
#Put a single classifier into a different column
data_test = data_modif.copy()
data_test = pd.concat([data_test,data_test['klass'].str.split(' ', expand=True)], axis=1)
data_test.head()

In [None]:
#Put each classifier in a column and a 1 or  a 0 if the classifier is present for each audio
goodsound = np.zeros_like(data_test.klass)
baddynamics = np.zeros_like(data_test.klass)
badattack = np.zeros_like(data_test.klass)
badpitch = np.zeros_like(data_test.klass)
badtimber = np.zeros_like(data_test.klass)
badrichness = np.zeros_like(data_test.klass)
n_goodsounds = 0
n_baddynamics = 0
n_badattack = 0
n_badpitch = 0
n_badtimber = 0
n_badrichness = 0

for index, row in data_test.iterrows():
    if (row[0] == "good-sound" or row[1] == "good-sound" or row[2] == "good-sound"):
        goodsound[index] = 1
        n_goodsounds +=1
    if (row[0] == "bad-dynamics" or row[1] == "bad-dynamics" or row[2] == "bad-dynamics"):
        baddynamics[index] = 1
        n_baddynamics += 1
    if (row[0] == "bad-attack" or row[1] == "bad-attack" or row[2] == "bad-attack"):
        badattack[index] = 1
        n_badattack += 1
    if (row[0] == "bad-pitch" or row[1] == "bad-pitch" or row[2] == "bad-pitch"):
        badpitch[index] = 1
        n_badpitch += 1
    if (row[0] == "bad-timbre" or row[1] == "bad-timbre" or row[2] == "bad-timbre"):
        badtimber[index] = 1
        n_badtimber += 1
    if (row[0] == "bad-richness" or row[1] == "bad-richness" or row[2] == "bad-richness"):
        badrichness[index] = 1
        n_badrichness += 1
           
data_test['Good-sound'] = goodsound
data_test['Bad-attack'] = badattack
data_test['Bad-dynamics'] = baddynamics
data_test['Bad-pich'] = badpitch
data_test['Bad-timber'] = badtimber
data_test['Bad-richness'] = badrichness

data_test2 = data_test.drop(['klass', 0, 1, 2], axis=1)

data_test2.head()

In [None]:
#Nubers of descriptors
print("Good-Sounds:\t", n_goodsounds,
      "\nBad-Attack:\t", n_badattack,
      "\nBad-Dynamics:\t", n_baddynamics,
      "\nBad-Pich:\t", n_badpitch,
      "\nBad-Timber:\t", n_badtimber,
      "\nBad-Richness:\t", n_badrichness)

In [None]:
csvfilename = 'SingleNotes_LowLevelFeatures.csv'
data_test2.to_csv(csvfilename, index=False)

In [None]:
data = pd.read_csv(csvfilename)
data.head()