# Preprocessing of audio files for sentiment analysis

In [10]:
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import numpy as np
import librosa
import os

# Calculate Energy
def energy_calc(signal: np.array, segment_length: int) -> np.array:
    """
    Calculates energy of the audio segment. Normalised with segment legth.
    """
    energy = []
    for i in range(int(len(signal)/segment_length)):
        segment = signal[i*segment_length:(i+1)*segment_length]# try except error ...
        energy.append(np.sum(np.square(segment)) / segment_length)
        if energy[-1] < 0:
            print(i)
    return energy

# Preprocess signal
def preprocess_signal(filename: str, short_term_length:float=0.020, short_term_overlap:float=0,\
                      medium_term_length:float=1, medium_term_overlap:float=0.020) -> np.array:
    """
    Preprocessing of the audiofile to get 28 coeficients after three steps:
    - Short term analysis: segmentation of audio to get energy and 13 MFCCs per segment. 
    - Medium term analysis: segmentation of audio to get mean and standard deviation per segment.
    - Long term analysis: mean of the medium term values per segment.
    """
    
    # Import audio signal
    sr, signal = read(filename)
    
    # Convert to 8kHz
    sr_objective = 8000
    sr_ratio = int(sr/sr_objective)
    try:
        signal = signal[::sr_ratio,0]
    except IndexError:
        signal = signal[::sr_ratio]
    sr = sr_objective    

    # Normalise
    signal = signal.astype(np.float32)
    signal = signal / np.abs(signal).max() / 2
    
    # Calculate length and define segments
    length = len(signal)
    length_s = length/sr # length of segment in seconds
    short_term_length = 0.020 # s 
    short_term_overlap = 0 # s
    medium_term_length = 1 # s 
    medium_term_overlap = 0.020 # s

    # Convert to samples per segment
    n_fft_st = int(length_s // (short_term_length - short_term_overlap))
    hop_length_st = n_fft_st # no overlap
    segment_length = n_fft_st
    energy = np.array(energy_calc(signal, n_fft_st))
    
    # SHORT TERM ANALYSIS
    # Calculate MFCCs for short term
    mfcc_st = librosa.feature.mfcc(y=signal, sr=sr, n_fft=n_fft_st, n_mfcc=13, hop_length=hop_length_st)
    mfcc_st = mfcc_st[:,:len(energy)]
    coefficients_st = np.vstack((mfcc_st, energy))

    
    # MEDIUM TERM ANALYSIS
    # Calculation of segments length for medium term analysis
    n_segments_mt = int(length_s // (medium_term_length - medium_term_overlap))
    n_fft_mt = int(coefficients_st.shape[1] * medium_term_length / length_s)
    hop_length_mt = int(coefficients_st.shape[1] * (medium_term_length - medium_term_overlap) / length_s)     

    # Calculation of parameters for medium term analysis
    for i in range(n_segments_mt):
        coefficient_i = coefficients_st[:, i*hop_length_mt:i*hop_length_mt+n_fft_mt]
        mean_i = np.mean(coefficient_i, axis=1)
        std_i = np.std(coefficient_i, axis=1)
        if i == 0:
            parameters_mt = np.hstack((mean_i, std_i))
        else:
            parameters_mt = np.row_stack((parameters_mt, np.hstack((mean_i, std_i))))

    # LONG TERM ANALYSIS 
    # Calculation of parameters for long term analysis
    if n_segments_mt > 1:
        parameters_lt = np.mean(parameters_mt, axis=0)
    else: 
        parameters_lt = parameters_mt

    return parameters_lt

# Get labels from directories
def get_label(filename:str) -> str:
    """
    Assign label from directory name.
    """
    label = filename.split("/")[-2]
    return label

# Merge characteristics and labels
def add_label(filename:str) -> np.array:
    """
    Add label to numpy array with 28 characteristics.
    """
    coefficients = preprocess_signal(filename)
    label = np.array(get_label(filename))
    return np.hstack((coefficients, label))

# Merge characteristics and labels from numpy arrays
def add_label_arrays(x:np.array, y:np.array) -> np.array:
    return np.hstack((x, y))

## Preprocessing of one audio file

In [11]:
filename = 'data/happy/OAF_back_happy.wav'
preprocess_signal(filename)

array([-6.93979321e+02, -1.69657067e+00,  1.91588197e+00,  1.73659050e+01,
       -1.09046655e+01, -7.11656473e+00, -7.40737883e+00, -3.93999587e+00,
       -7.61227444e+00, -5.92619336e+00,  1.35270376e+00, -6.45182750e+00,
        7.49836474e-01,  3.15046834e-03,  8.84843322e+01,  5.13459678e+01,
        2.41188476e+01,  1.39825992e+01,  1.71637517e+01,  1.37460917e+01,
        1.33230337e+01,  8.69477242e+00,  1.04948500e+01,  9.86751072e+00,
        1.00342119e+01,  9.80094698e+00,  9.08291644e+00,  4.23519576e-03])

In [12]:
add_label(filename)

array(['-693.979321443582', '-1.696570674075356', '1.9158819657337816',
       '17.365904977804497', '-10.904665450506572', '-7.116564728602579',
       '-7.407378833505172', '-3.9399958698055415', '-7.612274439840377',
       '-5.9261933575514', '1.3527037599418736', '-6.451827502891987',
       '0.7498364737611029', '0.0031504683376025254', '88.48433222051601',
       '51.345967795497096', '24.11884761473756', '13.982599152569499',
       '17.163751667610406', '13.746091660038916', '13.323033704787505',
       '8.694772417788025', '10.49484997040529', '9.867510722897586',
       '10.034211872607441', '9.800946976442336', '9.0829164389685',
       '0.004235195755367474', 'happy'], dtype='<U32')

## Preprocessing of data from directory

In [16]:
path = 'data'
data = np.empty((29, 0))
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(path)):
        print(f'Directory: {dirpath}')
        # ensure we're processing a genre sub-folder level
        if dirpath is not path:
            for file in filenames:
                data_file = add_label(os.path.join(dirpath, file))
                data = np.append(data, np.expand_dims(data_file, axis=1), axis=1)

data = data.T
np.savez('Sentiment_analysis_data', inputs=data[:,:28], targets=data[:,-1])

Directory: data
Directory: data/fear
Directory: data/disgust
Directory: data/happy
Directory: data/sad
Directory: data/neutral
Directory: data/angry
Directory: data/surprise
