In [187]:
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import numpy as np
import librosa
import os

# calculate Energy
def energy_calc(signal, segment_length):
    energy = []
    for i in range(int(len(signal)/segment_length)):
        segment = signal[i*segment_length:(i+1)*segment_length]# try except error ...
        energy.append(np.sum(np.square(segment)) / segment_length)
        if energy[-1] < 0:
            print(i)
    return energy


def preprocess_signal(filename, short_term_length=0.020, short_term_overlap=0,\
                      medium_term_length=1, medium_term_overlap=0.020):
    
    # Import audio signal
    sr, signal = read(filename)
    
    # Convert to 8kHz
    sr_objective = 8000
    sr_ratio = int(sr/sr_objective)

    signal = signal[::sr_ratio,0]
    sr = sr_objective

    # Normalise
    signal = signal.astype(np.float32)
    signal = signal / np.abs(signal).max() / 2
    
    # Calculate length and define segments
    length = len(signal)
    length_s = length/sr # length of segment in seconds
    short_term_length = 0.020 # s 
    short_term_overlap = 0 # s
    medium_term_length = 1 # s 
    medium_term_overlap = 0.020 # s

    # Convert to samples per segment
    n_fft_st = int(length_s // (short_term_length - short_term_overlap))
    hop_length_st = n_fft_st # no overlap
    segment_length = n_fft_st
    energy = np.array(energy_calc(signal, n_fft_st))
    
    # SHORT TERM ANALYSIS
    # Calculate MFCCs for short term
    mfcc_st = librosa.feature.mfcc(y=signal, sr=sr, n_fft=n_fft_st, n_mfcc=13, hop_length=hop_length_st)
    mfcc_st = mfcc_st[:,:len(energy)]
    coefficients_st = np.vstack((mfcc_st, energy))

    
    # MEDIUM TERM ANALYSIS
    # Calculation of segments length for medium term analysis
    n_segments_mt = int(length_s // (medium_term_length - medium_term_overlap))
    n_fft_mt = int(coefficients_st.shape[1] * medium_term_length / length_s)
    hop_length_mt = int(coefficients_st.shape[1] * (medium_term_length - medium_term_overlap) / length_s)     

    # Calculation of parameters for medium term analysis
    for i in range(n_segments_mt):
        coefficient_i = coefficients_st[:, i*hop_length_mt:i*hop_length_mt+n_fft_mt]
        mean_i = np.mean(coefficient_i, axis=1)
        std_i = np.std(coefficient_i, axis=1)
        if i == 0:
            parameters_mt = np.hstack((mean_i, std_i))
        else:
            parameters_mt = np.row_stack((parameters_mt, np.hstack((mean_i, std_i))))

    # LONG TERM ANALYSIS 
    # Calculation of parameters for long term analysis
    parameters_lt = np.mean(parameters_mt, axis=0)

    return parameters_lt

def get_label(filename):
    label = filename.split("/")[-2]
    return label

def add_label(filename):
    coefficients = preprocess_signal(filename)
    label = np.array(get_label(filename))
    return np.hstack((coefficients, label))



In [188]:
filename = 'data/happy/happy01.wav'
preprocess_signal(filename)

array([-5.63653785e+02,  5.20675731e+01,  8.89787488e+00,  7.44162645e+00,
       -5.25225910e+00, -1.11777476e+01, -9.70701272e+00, -5.84339502e+00,
       -9.11111698e+00, -1.33537441e+01, -5.46280345e+00, -4.94980163e+00,
       -4.95768085e+00,  3.92568166e-03,  1.44557718e+02,  4.95313284e+01,
        2.51291264e+01,  2.36178286e+01,  1.37604781e+01,  1.67577753e+01,
        1.42047683e+01,  1.31571770e+01,  1.18102456e+01,  1.05672550e+01,
        1.06916349e+01,  1.01569171e+01,  9.74912678e+00,  5.34257932e-03])

In [191]:
add_label(filename)

array(['-563.6537849024722', '52.06757308934864', '8.89787487724894',
       '7.441626449948863', '-5.252259098777646', '-11.177747606446868',
       '-9.707012723151006', '-5.843395023165565', '-9.111116984957143',
       '-13.353744100583226', '-5.462803449873862', '-4.949801625978005',
       '-4.957680847495794', '0.003925681660364937', '144.55771813941365',
       '49.531328384412205', '25.129126350096566', '23.617828558393263',
       '13.76047811261888', '16.757775283670767', '14.204768263856876',
       '13.157176977638823', '11.810245560730635', '10.567255048223094',
       '10.691634904790398', '10.156917096468524', '9.749126782940861',
       '0.0053425793245134075', 'happy'], dtype='<U32')

In [192]:
path = 'data'
data = np.empty((29, 0))
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not path:
            for file in filenames:
                data_file = add_label(os.path.join(dirpath, file))
                data = np.append(data, np.expand_dims(data_file, axis=1), axis=1)
data = data.T
np.savez('Sentiment_analysis_data', inputs=data[:,:28], targets=data[:,-1])

## Neural Network

In [193]:
import tensorflow as tf
from sklearn import preprocessing

In [194]:
npz = np.load('Sentiment_analysis_data.npz')

In [195]:
x = npz['inputs']
y = npz['targets']

In [196]:
n = x.shape[0]

In [197]:
# Standarise data
scaled_x = preprocessing.scale(x)

In [198]:
# Shuffle data
shuffled_indices = np.arange(n)
np.random.shuffle(shuffled_indices)

shuffled_x = scaled_x[shuffled_indices]
shuffled_y = y[shuffled_indices]

In [199]:
labels_y, onehot_y = np.unique(shuffled_y, return_inverse=True)

In [200]:
shuffled_y, labels_y, onehot_y

(array(['sad', 'happy', 'happy', 'sad'], dtype='<U32'),
 array(['happy', 'sad'], dtype='<U32'),
 array([1, 0, 0, 1]))

In [201]:
n_classes = labels_y.shape[0]
targets = onehot_y
# onehot_y = np.eye(n_classes)[targets]
onehot_y

array([1, 0, 0, 1])

In [202]:
ratio = 0.75

n_train = int(n*0.75)

x_train = shuffled_x[:n_train]
y_train = onehot_y[:n_train]

x_test = shuffled_x[n_train:]
y_test = onehot_y[n_train:]

In [203]:
hidden_layer_1_size = 50
hidden_layer_2_size = 50
output_size = 2

model = tf.keras.Sequential([
#     tf.keras.layers.Flatten(input_shape=(x_test.shape[0], x_test.shape[1])),
    tf.keras.layers.Dense(hidden_layer_1_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_1_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [204]:
x_train.shape, y_train.shape

((3, 28), (3,))

In [205]:
batch_size = 1
max_epochs = 10

model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=max_epochs,
          verbose=2
          )

Epoch 1/10
3/3 - 0s - loss: 0.6896 - accuracy: 0.6667
Epoch 2/10
3/3 - 0s - loss: 0.5380 - accuracy: 0.6667
Epoch 3/10
3/3 - 0s - loss: 0.4172 - accuracy: 0.6667
Epoch 4/10
3/3 - 0s - loss: 0.3202 - accuracy: 1.0000
Epoch 5/10
3/3 - 0s - loss: 0.2402 - accuracy: 1.0000
Epoch 6/10
3/3 - 0s - loss: 0.1976 - accuracy: 1.0000
Epoch 7/10
3/3 - 0s - loss: 0.1461 - accuracy: 1.0000
Epoch 8/10
3/3 - 0s - loss: 0.1223 - accuracy: 1.0000
Epoch 9/10
3/3 - 0s - loss: 0.0969 - accuracy: 1.0000
Epoch 10/10
3/3 - 0s - loss: 0.0742 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7f7f8c722550>

In [206]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)



In [207]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.96. Test accuracy: 0.00%
