## Feature Extraction, New and Improved

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
from pathlib import Path
from sklearn.model_selection import train_test_split
from pydub import AudioSegment
from scipy.fftpack import fft, dct

## Using audio data 

In [None]:
samples, sample_rate = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1.wav')

**Visualize audio files with librosa**

In [None]:
librosa.display.waveplot(samples, sr=sample_rate);

---

### Feature: 
BPM

Let's try it with a longer sample with a known BPM.

In [None]:
# Estimate a static tempo
y, sr = librosa.load( '../../../workout.wav')
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
tempo.item() #according to song BPM the correct tempo is 174 BPM

In [None]:
# Maybe it's not a static tempo

# But rather a dynamic tempo
dtempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr,
                            aggregate=None)
dtempo

Now with short, tap sample.

In [None]:
# Estimate a static tempo
y, sr = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1.wav')
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
tempo 

In [None]:
# Estimate a static tempo
y, sr = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1_stretch.wav')
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
tempo 

**This could be a promising feature.**

### Other (maybe useful) stuff from librosa

In [None]:
y, sr = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1.wav', mono=False)
#y, sr = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1_stretch.wav')
y.shape # Check mono or stereo

In [None]:
librosa.core.get_duration(y =y, sr=sr) # Get duration in seconds

## Trying out new features

**Short-time Fourier transform (STFT)**

In [None]:
n_fft = 256

In [None]:
def get_features_stft (path):
        y, sr = librosa.load(path)
        D = np.abs(librosa.stft(y, n_fft=n_fft))
        return np.mean(D.T, axis=0)

In [None]:
path = '../../../Source/Clean_train_clips/Shuffle/1/1.wav'
y, sr = librosa.load(path)
#get_features_stft(path)
librosa.display.waveplot(y,sr);

In [None]:
get_features_stft('../../../Source/Clean_train_clips/Shuffle/6/1.wav').shape

In [None]:
y, sr = librosa.load('../../../Source/Clean_train_clips/Shuffle/1/1.wav')
D = np.abs(librosa.stft(y, n_fft=n_fft))

librosa.display.specshow(D);
D.shape

In [None]:
# Using left-aligned frames
D_left = np.abs(librosa.stft(y, n_fft=n_fft,center=False))
D_left.shape
#librosa.display.specshow(D_left);

In [None]:
# Using a shorter hop length
D_short = np.abs(librosa.stft(y, n_fft=n_fft,hop_length=64))
D_short.shape
#librosa.display.specshow(D_short);

In [None]:
y, sr = librosa.load(path)
S = np.abs(librosa.stft(y))
plt.figure(figsize=(10,8))
plt.subplot(3, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(S,ref=np.max),y_axis='log', x_axis='time')
plt.title('Input spectrogram')
plt.colorbar(format='%+2.0f dB');

**Fast Fourier Transform**

In [None]:
y, sr = librosa.load('../../../Source/Shuffle/1/1.wav')
fftrans = fft(y).real
fftrans.shape, fftrans

In [None]:
def get_features_fft(path):
    y, sr = librosa.load(path)
    return np.mean(fft(y).real)

**Discrete Cosine Transform**

In [None]:
y, sr = librosa.load('../../../Source/Shuffle/1/1.wav')
dctrans = dct(y, axis=0) #dct(y, 1)

plt.subplot(211)
plt.specgram(y, Fs=sr, NFFT=n_fft);
plt.subplot(212)
plt.specgram(dctrans, Fs=sr, NFFT=n_fft);

#plt.subplot(221)
#plt.plot(dctrans)
#plt.subplot(222)
#plt.plot(y);

In [None]:
def get_features_dct(path):
    y, sr = librosa.load(path)
    return (dct(y))

### Feature:
Mel-frequency cepstral coefficients 

In [2]:
n_mfcc = 20   #Remember to change file path at end of nb

In [3]:
# Extract mfcc feature from data
mfccs = np.mean(librosa.feature.mfcc(y=samples, sr=sample_rate, n_mfcc=n_mfcc).T,axis=0) 

NameError: name 'samples' is not defined

In [4]:
len(mfccs)
#librosa.display.specshow(librosa.feature.mfcc(y=samples, sr=sample_rate, n_mfcc=n_mfcc))

NameError: name 'mfccs' is not defined

---

### Repeat for all data (Organize in a DataFrame)

In [5]:
def build_list(step, folder, length):
    i = 1
    step_list = []
    while i <= length :
        name = step + "/" + str(folder) + "/" +str(i) + ".wav"
        step_list.append(name)
        i += 1
    return step_list

def get_label(path):
    if path.parts[-3] == 'Shuffle':
        return 1
    else:
        return 0

In [6]:
shuffle_col, bc_col, path_col = [], [], []

In [7]:
pathlist = Path('../../../Source/Clean_train_clips/Re_augmented/Shuffle').glob('**/*.wav')
for path in pathlist:
    shuffle_col.append(path)
    shuffle_col.sort()

In [8]:
pathlist = Path('../../../Source/Clean_train_clips/Re_augmented/Ball_change').glob('**/*.wav')
for path in pathlist:
    bc_col.append(path)
    bc_col.sort()

In [9]:
untrans_path_col = []
train_untransform = pd.read_csv('../../../Source/Data/X_train_preAugmented.csv')
for i in range ( len(train_untransform)):
    untrans_path_col.append(Path(train_untransform.loc[i, 'Path']))

In [10]:
path_col = shuffle_col + bc_col + untrans_path_col

In [11]:
len(path_col)

912

** Add file paths**

In [12]:
tap = pd.DataFrame({'Path':path_col})
tap.shape

(912, 1)

** Add labels**

In [13]:
tap['Labels'] = [get_label(tap.loc[idx,'Path']) for idx in range(len(tap))]

In [14]:
tap.head()

Unnamed: 0,Path,Labels
0,../../../Source/Clean_train_clips/Re_augmented...,1
1,../../../Source/Clean_train_clips/Re_augmented...,1
2,../../../Source/Clean_train_clips/Re_augmented...,1
3,../../../Source/Clean_train_clips/Re_augmented...,1
4,../../../Source/Clean_train_clips/Re_augmented...,1


**Add Features: MFCCs**

In [15]:
def get_features_mfcc(path):
    samples, sample_rate = librosa.load(path)
    mfccs = np.mean(librosa.feature.mfcc(y=samples, sr=sample_rate, n_mfcc=n_mfcc).T,axis=0)
    return mfccs

In [None]:
# TOO SLOW!
#for i in range (n_mfcc):
#    tap[str(i)] = [get_features_mfcc(tap.loc[idx, 'Path'])[i] for idx in range (len(tap))]

In [16]:
# Create an empty dataframe to fill with MFCC values
d = pd.DataFrame(np.zeros((912, 20)))
tap = pd.concat([tap, d], axis=1)

In [20]:
# FASTER! 
for j in range (len(tap))  :  
    s = get_features_mfcc(tap.loc[j,'Path'])
    for i in range (n_mfcc):
        tap.iat[j,i+2] = s[i]

In [21]:
tap.head()

Unnamed: 0,Path,Labels,0,1,2,3,4,5,6,7,...,10,11,12,13,14,15,16,17,18,19
0,../../../Source/Clean_train_clips/Re_augmented...,1,-111.605465,39.188962,-81.679966,20.393887,4.074106,-11.102911,-18.30051,-4.375751,...,-14.746165,-13.217085,2.010731,-9.008981,-11.552828,1.225305,-3.679972,-13.073083,-2.380595,-2.002468
1,../../../Source/Clean_train_clips/Re_augmented...,1,-124.765927,59.099908,-107.706609,34.986722,-7.126354,-4.114656,-28.8561,0.194913,...,-19.339707,-13.854616,-1.720148,-9.82881,-14.683264,-0.436352,-6.158605,-14.315085,-3.680532,-3.380997
2,../../../Source/Clean_train_clips/Re_augmented...,1,-157.899262,62.348967,-104.69015,33.359436,-9.649191,-4.82344,-30.225424,0.160945,...,-22.051224,-11.060202,1.349207,-10.08947,-13.73011,-0.723506,-5.207173,-16.210524,-4.518669,-3.49228
3,../../../Source/Clean_train_clips/Re_augmented...,1,-112.563192,43.32204,-77.893877,23.408783,9.349165,-9.172222,-12.264922,-10.973006,...,-24.674963,-12.264014,5.147175,-9.701486,-10.931069,-0.785393,-1.715562,-19.159819,-5.605177,-2.166103
4,../../../Source/Clean_train_clips/Re_augmented...,1,-127.778268,59.036205,-98.709749,36.489147,-5.924282,-0.904265,-23.447733,-5.196082,...,-32.384483,-10.076653,-0.175095,-9.560018,-14.250261,-1.962245,-4.320603,-20.652914,-8.085718,-2.686323


** Add Features: Tempo**

In [None]:
tap = pd.read_csv('../../../Source/Data/train_data_augmented_mfcc.csv')

In [None]:
tap.head()

In [None]:
def get_features_bpm(path):
    samples, sample_rate = librosa.load(path)
    onset_env = librosa.onset.onset_strength(samples, sr=sample_rate)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sample_rate)
    return tempo.item()

In [None]:
get_features_bpm('../../../Source/Clean_train_clips/Augmented/Shuffle/1/1_stretch.wav')

In [None]:
get_features_bpm('../../../Source/Clean_train_clips/Shuffle/1/1.wav')

In [None]:
tap['BPM'] = [get_features_bpm(tap.loc[idx, 'Path']) for idx in range (len(tap))]

In [None]:
tap.head()

** Add features: zero-crossing rate (pad with zeros to make a consistant length)**

In [None]:
length_list = []
for i in range (len(path_col)):
    samples, sample_rate = librosa.load(path_col[i])
    length_list.append(len(samples))
print(max(length_list))

In [None]:
length = max(length_list)
def get_features_ZCR(path):
    samples, sample_rate = librosa.load(path)
    if len(samples) < length:
        y = librosa.util.pad_center(samples, length ,axis=0) 
    else:
        y = samples
    #else :
    #    midpoint = int(samples.shape[0]/2)
    #    y = samples[midpoint-int(length/2):midpoint+int(length/2)]
    return librosa.feature.zero_crossing_rate(y, frame_length=250, hop_length=125)

In [None]:
# Create an empty dataframe to fill with ZCR values
d = pd.DataFrame(np.zeros((912, 165)))
tap = pd.concat([tap, d], axis=1)

In [None]:
for j in range (len(tap))  :  
    s = get_features_ZCR(tap.loc[j,'Path'])[0]
    for i in range (165):
        tap.iat[j,i+2] = s.item(i)

In [None]:
tap.head()

** Add features: root mean square energy value**

In [None]:
def get_features_rmse(path):
    samples, sample_rate = librosa.load(path)
    samples = librosa.util.pad_center(samples, 54000, axis=0)
    #return np.mean(librosa.feature.rmse(y=samples).T,axis=0).item()
    #return librosa.feature.rmse(samples, frame_length=512, hop_length=256)
    return samples

In [None]:
#tap['RMSE'] = [get_features_rmse(tap.loc[idx, 'Path']) for idx in range (len(tap))]
get_features_rmse('../../../Source/Clean_train_clips/Shuffle/1/2.wav').shape
plt.plot(get_features_rmse('../../../Source/Clean_train_clips/Shuffle/1/2.wav'))

** Add features: short term energy**

In [None]:
def get_features_ste(path):
    hop_length = 125
    frame_length = 250
    samples, sample_rate = librosa.load(path)
    energy = np.array([sum(abs(samples[i:i+frame_length]**2))for i in range(0, len(samples), hop_length)])
    #return np.mean(energy.T)
    return energy

In [None]:
get_features_ste('../../../Source/Clean_train_clips/Augmented/Ball_change/5/1_stretch.wav').shape

In [None]:
y, sr = librosa.load('../../../Source/Clean_train_clips/Augmented/Ball_change/5/1_stretch.wav')
plt.plot(y)

In [None]:
#tap['STE'] = [get_features_ste(tap.loc[idx, 'Path']) for idx in range (len(tap))]

**Add features: Short-time Fourier Transform**

In [None]:
#tap['STFT'] = [get_features_stft(tap.loc[idx, 'Path']) for idx in range (len(tap))]


In [None]:
#for i in range (int(n_fft/2+1)):
#    tap[str(i)] = [get_features_stft(tap.loc[idx, 'Path'])[i] for idx in range (len(tap))]

**Add features: Fast Fourier Transform**

In [None]:
#tap['FFT'] = [get_features_fft(tap.loc[idx, 'Path']) for idx in range (len(tap))]

**Add features: Discrete cosine transform**

In [None]:
#tap['DCT'] = [get_features_dct(tap.loc[idx, 'Path']) for idx in range (len(tap))]

### Pimped out DataFrame

In [22]:
tap.head()

Unnamed: 0,Path,Labels,0,1,2,3,4,5,6,7,...,10,11,12,13,14,15,16,17,18,19
0,../../../Source/Clean_train_clips/Re_augmented...,1,-111.605465,39.188962,-81.679966,20.393887,4.074106,-11.102911,-18.30051,-4.375751,...,-14.746165,-13.217085,2.010731,-9.008981,-11.552828,1.225305,-3.679972,-13.073083,-2.380595,-2.002468
1,../../../Source/Clean_train_clips/Re_augmented...,1,-124.765927,59.099908,-107.706609,34.986722,-7.126354,-4.114656,-28.8561,0.194913,...,-19.339707,-13.854616,-1.720148,-9.82881,-14.683264,-0.436352,-6.158605,-14.315085,-3.680532,-3.380997
2,../../../Source/Clean_train_clips/Re_augmented...,1,-157.899262,62.348967,-104.69015,33.359436,-9.649191,-4.82344,-30.225424,0.160945,...,-22.051224,-11.060202,1.349207,-10.08947,-13.73011,-0.723506,-5.207173,-16.210524,-4.518669,-3.49228
3,../../../Source/Clean_train_clips/Re_augmented...,1,-112.563192,43.32204,-77.893877,23.408783,9.349165,-9.172222,-12.264922,-10.973006,...,-24.674963,-12.264014,5.147175,-9.701486,-10.931069,-0.785393,-1.715562,-19.159819,-5.605177,-2.166103
4,../../../Source/Clean_train_clips/Re_augmented...,1,-127.778268,59.036205,-98.709749,36.489147,-5.924282,-0.904265,-23.447733,-5.196082,...,-32.384483,-10.076653,-0.175095,-9.560018,-14.250261,-1.962245,-4.320603,-20.652914,-8.085718,-2.686323


### Split into train and test sets

In [23]:
X = tap.drop(['Labels'], axis =1)
y = tap[['Labels']]

In [24]:
#X_train, X_test, y_train, y_test = train_test_split(X, y,
#                                                    stratify=y, 
#                                                    test_size=0.25)

In [25]:
X_train = pd.DataFrame(X)
#X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y)
#y_test = pd.DataFrame(y_test)

In [26]:
X_train.to_csv('../../../Source/Data/X_train_audio_reaugmented_mfcc.csv', index=None)
y_train.to_csv('../../../Source/Data/y_train_audio_reaugmented_mfcc.csv', index=None)
#X_test.to_csv('../../../Source/Data/X_test_audio_augmented_stft.csv', index=None)
#y_test.to_csv('../../../Source/Data/y_test_audio_augmented_stft.csv', index=None)
#X_train.to_csv('../../../Source/Data/X_train_audio_augmented_mfcc_tempo.csv', index=None)
#y_train.to_csv('../../../Source/Data/y_train_audio_augmented_mfcc_tempo.csv', index=None)



In [27]:
tap.to_csv('../../../Source/Data/train_data_reaugmented_mfcc.csv', index=None)

### Repeat feature extraction on validation/test set

In [None]:
#y_test = pd.read_csv('../../../Source/Data/y_test_preAugmented_stft.csv')

In [None]:
#X_test_path_col = []
#X_test = pd.read_csv('../../../Source/Data/X_test_preAugmented_stft.csv', names=['Path'])
#for i in range ( len(X_test)):
#    X_test_path_col.append(Path(X_test.loc[i, 'Path']))

In [None]:
#test_data = pd.DataFrame({'Path':X_test_path_col})

In [None]:
#test_data['Labels'] = [get_label(test_data.loc[idx,'Path']) for idx in range(len(test_data))]

In [28]:
test_data = pd.read_csv('../../../Source/Data/X_test_reserved.csv')
test_labels = pd.read_csv('../../../Source/Data/y_test_reserved.csv')

In [33]:
test_data.shape

(115, 21)

In [None]:
#for i in range (int(n_fft/2+1)):
#    test_data[str(i)] = [get_features_stft(test_data.loc[idx, 'Path'])[i] for idx in range (len(test_data))]

In [None]:
#for i in range (668):
#    test_data[str(i)] = [get_features_ZCR(path)[0][i] for idx in range (len(test_data))]

In [30]:
# Create an empty dataframe to fill with MFCC values
d = pd.DataFrame(np.zeros((115, 20)))
test_data = pd.concat([test_data, d], axis=1)

In [35]:
# FASTER! 
for j in range (len(test_data))  :  
    s = get_features_mfcc(test_data.loc[j,'Path'])
    for i in range (n_mfcc):
        test_data.iat[j,i+1] = s[i]

In [None]:
#test_data['BPM'] = [get_features_bpm(test_data.loc[idx, 'Path']) for idx in range (len(test_data))]

In [None]:
# Create an empty dataframe to fill with ZCR values
d = pd.DataFrame(np.zeros((115, 165)))
test_data = pd.concat([test_data, d], axis=1)

In [None]:
for j in range (len(test_data))  :  
    s = get_features_ZCR(test_data.loc[j,'Path'])[0]
    for i in range (165):
        test_data.iat[j,i+1] = s.item(i) # Because I don't have the Labels column!

In [None]:
#X = test_data.drop(['Labels'], axis =1)
#y = test_data[['Labels']]

In [36]:
#X_test = pd.DataFrame(X)
#y_test = pd.DataFrame(y)
X_test = test_data
y_test = test_labels

In [39]:
#X_test.head()
#y_test.head()

In [40]:
X_test.to_csv('../../../Source/Data/X_test_audio_mfcc_2.csv', index=None)
y_test.to_csv('../../../Source/Data/y_test_audio_mfcc_2.csv', index=None)