# Preprocessing of audio files for sentiment analysis

In [1]:
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import numpy as np
import librosa

In [12]:
filename = 'data/happy/happy01.wav'

In [13]:
sr, signal = read(filename)

In [14]:
# Convert to 8kHz
sr_objective = 8000
sr_ratio = int(sr/sr_objective)

try:
    signal = signal[::sr_ratio,0]
except IndexError:
    signal = signal[::sr_ratio]
    
sr = sr_objective    

# Normalise
signal = signal.astype(np.float32)
signal = signal / np.abs(signal).max() / 2

In [15]:
# calculate length and define segments

length = len(signal)
length_s = length/sr # length of segment in seconds

short_term_length = 0.020 # s 
short_term_overlap = 0 # s

medium_term_length = 1 # s 
medium_term_overlap = 0.020 # s

In [16]:
# convert to samples per segment

n_fft_st = int(length_s // (short_term_length - short_term_overlap))
hop_length_st = n_fft_st # no overlap

In [17]:
# calculate Energy
def energy_calc(signal, segment_length):
    energy = []
    for i in range(int(len(signal)/segment_length)):
        segment = signal[i*segment_length:(i+1)*segment_length]# try except error ...
        energy.append(np.sum(np.square(segment)) / segment_length)
        if energy[-1] < 0:
            print(i)
    return energy

In [18]:
segment_length = n_fft_st
energy = np.array(energy_calc(signal, n_fft_st))
energy.shape

(160,)

### Short Term Analysis

In [19]:
# Calculate MFCCs for short term
mfcc_st = librosa.feature.mfcc(y=signal, sr=sr, n_fft=n_fft_st, n_mfcc=13, hop_length=hop_length_st)
mfcc_st = mfcc_st[:,:len(energy)]
mfcc_st.shape



(13, 160)

In [20]:
coefficients_st = np.vstack((mfcc_st, energy))
coefficients_st.shape

(14, 160)

In [21]:
coefficients_st

array([[-8.29161560e+02, -8.30007812e+02, -8.29951111e+02, ...,
        -7.57773987e+02, -7.51710754e+02, -7.55843506e+02],
       [ 9.48702717e+00,  8.27597523e+00,  1.56528730e+01, ...,
         3.49834099e+01,  3.51847916e+01,  2.33186264e+01],
       [-2.70762205e+00, -9.05908203e+00,  2.67609739e+00, ...,
        -1.54895535e+01, -6.56451511e+00,  1.81125200e+00],
       ...,
       [ 2.92121696e+00,  8.31076813e+00,  3.91079521e+00, ...,
        -5.86307144e+00, -5.05378151e+00, -6.24084997e+00],
       [ 4.52248764e+00,  4.69734955e+00,  3.31858993e-01, ...,
         1.00627918e+01, -1.99341130e+00,  2.74989128e-01],
       [ 1.42190369e-08,  1.42190369e-08,  1.45572356e-08, ...,
         2.58677679e-07,  1.94478791e-07,  3.69395023e-06]])

### Medium Term Analysis

In [22]:
# Calculation of segments length for medium term analysis
n_segments_mt = int(length_s // (medium_term_length - medium_term_overlap))

n_fft_mt = int(coefficients_st.shape[1] * medium_term_length / length_s)
hop_length_mt = int(coefficients_st.shape[1] * (medium_term_length - medium_term_overlap) / length_s) 

In [23]:
n_segments_mt

4

In [24]:
hop_length_mt

37

In [25]:
n_fft_mt

38

In [26]:
# Calculation of parameters for medium term analysis
for i in range(n_segments_mt):
    coefficient_i = coefficients_st[:, i*hop_length_mt:i*hop_length_mt+n_fft_mt]
    mean_i = np.mean(coefficient_i, axis=1)
    std_i = np.std(coefficient_i, axis=1)
    if i == 0:
        parameters_mt = np.hstack((mean_i, std_i))
    else:
        parameters_mt = np.row_stack((parameters_mt, np.hstack((mean_i, std_i))))
parameters_mt.shape

(4, 28)

In [27]:
parameters_mt

array([[-6.90725827e+02,  2.86621210e+01,  8.96717091e+00,
         4.48514923e+00, -5.35701460e+00, -5.16059302e+00,
        -3.21534335e+00, -4.71049260e+00, -7.54248108e+00,
        -6.55234593e+00, -4.23017829e+00, -5.14766665e+00,
        -3.60241607e+00,  3.56402518e-03,  1.77122755e+02,
         3.31171986e+01,  2.47380897e+01,  2.78873502e+01,
         1.26878973e+01,  1.60882072e+01,  9.73016763e+00,
         1.26431504e+01,  1.15063497e+01,  7.96397477e+00,
         9.33555395e+00,  7.20955447e+00,  9.33235789e+00,
         6.68123757e-03],
       [-4.07694145e+02,  4.60009475e+01,  1.92441713e+01,
         2.16733828e+01, -4.71479425e+00, -2.08422126e+01,
        -1.05692202e+01, -8.96436686e+00, -1.26417941e+01,
        -2.51157888e+01, -5.24150198e+00, -1.25265109e+01,
        -7.67983967e+00,  7.09392061e-03,  9.47048298e+01,
         5.32814625e+01,  2.62215507e+01,  2.85479604e+01,
         1.45921866e+01,  1.73856190e+01,  1.36901218e+01,
         1.44050707e+01,  1.27

### Long Term Analysis

In [28]:
# Calculation of parameters for long term analysis
if n_segments_mt > 1:
    parameters_lt = np.mean(parameters_mt, axis=0)
else: 
    parameters_lt = parameters_mt
parameters_lt.shape

(28,)

In [29]:
parameters_lt

array([-5.63653785e+02,  5.20675731e+01,  8.89787488e+00,  7.44162645e+00,
       -5.25225910e+00, -1.11777476e+01, -9.70701272e+00, -5.84339502e+00,
       -9.11111698e+00, -1.33537441e+01, -5.46280345e+00, -4.94980163e+00,
       -4.95768085e+00,  3.92568166e-03,  1.44557718e+02,  4.95313284e+01,
        2.51291264e+01,  2.36178286e+01,  1.37604781e+01,  1.67577753e+01,
        1.42047683e+01,  1.31571770e+01,  1.18102456e+01,  1.05672550e+01,
        1.06916349e+01,  1.01569171e+01,  9.74912678e+00,  5.34257932e-03])