In [3]:
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import numpy as np
import librosa

In [4]:
filename = 'data/happy/happy02.wav'

In [5]:
sr, signal = read(filename)

In [6]:
# Convert to 8kHz
sr_objective = 8000
sr_ratio = int(sr/sr_objective)

signal = signal[::sr_ratio,0]
sr = sr_objective

# Normalise
signal = signal.astype(np.float32)
signal = signal / np.abs(signal).max() / 2

In [7]:
# calculate length and define segments

length = len(signal)
length_s = length/sr # length of segment in seconds

short_term_length = 0.020 # s 
short_term_overlap = 0 # s

medium_term_length = 1 # s 
medium_term_overlap = 0.020 # s

In [8]:
# convert to samples per segment

n_fft_st = int(length_s // (short_term_length - short_term_overlap))
hop_length_st = n_fft_st # no overlap

In [9]:
# calculate Energy
def energy_calc(signal, segment_length):
    energy = []
    for i in range(int(len(signal)/segment_length)):
        segment = signal[i*segment_length:(i+1)*segment_length]# try except error ...
        energy.append(np.sum(np.square(segment)) / segment_length)
        if energy[-1] < 0:
            print(i)
    return energy

In [10]:
segment_length = n_fft_st
energy = np.array(energy_calc(signal, n_fft_st))
energy.shape

(160,)

### Short Term Analysis

In [11]:
# Calculate MFCCs for short term
mfcc_st = librosa.feature.mfcc(y=signal, sr=sr, n_fft=n_fft_st, n_mfcc=13, hop_length=hop_length_st)
mfcc_st = mfcc_st[:,:len(energy)]
mfcc_st.shape

(13, 160)

In [12]:
coefficients_st = np.vstack((mfcc_st, energy))
coefficients_st.shape

(14, 160)

In [13]:
coefficients_st

array([[-8.07894165e+02, -8.11137451e+02, -8.06580505e+02, ...,
        -6.87594055e+02, -5.13701904e+02, -6.27787354e+02],
       [-5.44695473e+00, -6.19074440e+00,  2.85001493e+00, ...,
         2.98954678e+01, -8.63910217e+01, -2.51684494e+01],
       [ 5.04853821e+00,  5.11809015e+00, -3.50555801e+00, ...,
         9.65695000e+00, -4.93669968e+01, -1.65936317e+01],
       ...,
       [ 5.32635736e+00,  4.23884821e+00,  5.74078846e+00, ...,
        -7.54451990e+00,  2.69626212e+00, -7.83749485e+00],
       [ 1.03735762e+01,  1.41846240e-01, -7.77473867e-01, ...,
        -5.26080608e+00,  1.99924514e-01, -3.99134088e+00],
       [ 2.85804729e-08,  3.34533884e-08,  2.38718143e-08, ...,
         1.28669806e-04,  1.47798284e-05,  9.49315170e-07]])

### Medium Term Analysis

In [46]:
# Calculation of segments length for medium term analysis
n_segments_mt = int(length_s // (medium_term_length - medium_term_overlap))

n_fft_mt = int(coefficients_st.shape[1] * medium_term_length / length_s)
hop_length_mt = int(coefficients_st.shape[1] * (medium_term_length - medium_term_overlap) / length_s) 

In [47]:
n_segments_mt

5

In [48]:
hop_length_mt

31

In [49]:
n_fft_mt

32

In [50]:
# Calculation of parameters for medium term analysis
for i in range(n_segments_mt):
    coefficient_i = coefficients_st[:, i*hop_length_mt:i*hop_length_mt+n_fft_mt]
    mean_i = np.mean(coefficient_i, axis=1)
    std_i = np.std(coefficient_i, axis=1)
    if i == 0:
        parameters_mt = np.hstack((mean_i, std_i))
    else:
        parameters_mt = np.row_stack((parameters_mt, np.hstack((mean_i, std_i))))
parameters_mt.shape

(5, 28)

In [52]:
parameters_mt

array([[-6.07647285e+02,  3.42703392e+01,  8.48582909e+00,
         1.71751554e+01,  4.10098400e+00,  8.32038901e+00,
        -7.64753586e+00, -5.26155654e+00, -3.49418551e+00,
        -1.40180118e+01, -8.08093873e+00, -3.23192842e+00,
        -1.33943588e+01,  3.77468249e-03,  2.20332172e+02,
         3.76649446e+01,  2.89429097e+01,  2.75223010e+01,
         1.07261201e+01,  1.72485251e+01,  1.06704773e+01,
         1.06947324e+01,  9.48516939e+00,  1.64844621e+01,
         1.12309209e+01,  8.50363708e+00,  1.82544107e+01,
         5.65030241e-03],
       [-3.71118052e+02,  3.45881971e+01,  2.11987227e+01,
         5.45285952e+01,  1.34892709e+01, -5.20427376e-02,
        -1.53967937e+01, -5.72083364e+00, -5.30284145e+00,
        -3.30707633e+01, -1.20971675e+01,  3.01407081e+00,
        -2.14355341e+01,  7.17405073e-03,  4.49109870e+01,
         8.35935552e+01,  3.38770433e+01,  2.82181202e+01,
         1.62886241e+01,  3.04074890e+01,  1.32405739e+01,
         1.63316635e+01,  1.42

### Long Term Analysis

In [53]:
# Calculation of parameters for long term analysis
parameters_lt = np.mean(parameters_mt, axis=0)
parameters_lt.shape

(28,)

In [54]:
parameters_lt

array([-5.02029923e+02,  4.06016647e+01,  1.91823649e+01,  4.02981212e+01,
        4.33879550e+00, -1.73989189e+00, -1.23199212e+01, -9.10006254e+00,
       -3.68050510e+00, -2.44847812e+01, -1.02009374e+01,  8.72966557e-01,
       -1.33137355e+01,  3.33873711e-03,  1.14688287e+02,  6.44903478e+01,
        2.99756987e+01,  2.77796996e+01,  1.58234617e+01,  2.15549613e+01,
        1.49548466e+01,  1.49445415e+01,  1.36071381e+01,  1.43207310e+01,
        1.19676177e+01,  1.00308679e+01,  1.29424286e+01,  4.78166938e-03])