In [None]:
import os, time, librosa, librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tensorflow.keras import utils

import audio_data_loader as audioloader
import references as ref

In [None]:
source = audioloader.create_source_dataset(ref.root_path)

#### Output data set:

0. full file name
1. modality
2. vocal chanel
3. emotion
4. emotional intensity
5. statement
6. repetitions
7. actor index
8. gender (1 - female, 0 - male)

In [None]:
def get_features(y, sr):
    out = []
    
    out.append(np.min(librosa.feature.spectral_centroid(y = y, sr = sr)))
    out.append(np.mean(librosa.feature.spectral_centroid(y = y, sr = sr)))
    out.append(np.max(librosa.feature.spectral_centroid(y = y, sr = sr)))

    return out

In [None]:
def get_dataset(source):
    
    m_train = []
    f_train = []
    
    for i, row in tqdm(source.iterrows()):
        
        try:
            
            # read the audio file
            y, sr = audioloader.read_audio(row[0])
            rms = get_features(y, sr)            
            
            # append 
            if row[8] == 1:
                f_train.append([rms[0], rms[1], rms[2]])
            else:
                m_train.append([rms[0], rms[1], rms[2]])
            
        except:
            
            print(f'Invalid object index {i}')
            print(row)
        
        
    return np.array(f_train), np.array(m_train)

In [None]:
def show_diagram(x, y, title):
    
    fig, ax = plt.subplots(1, 3, figsize = (12, 6))
    fig.tight_layout(pad = 3.0)
    fig.suptitle(title, fontsize = 25, y = 1.1)
    
    if x.shape[0] > 0:
        ax[0].plot(x[:, 0], color = 'blue', label = 'min rms - man')
        ax[1].plot(x[:, 1], color = 'blue', label = 'mean rms - man')
        ax[2].plot(y[:, 2], color = 'red', label = 'max rms - woman')
        
    if y.shape[0] > 0:
        ax[0].plot(y[:, 0], color = 'red', label = 'min rms - woman')
        ax[1].plot(y[:, 1], color = 'red', label = 'mean rms - woman')
        ax[2].plot(x[:, 2], color = 'blue', label = 'max rms - man')
        
    ax[0].legend()
    ax[1].legend()
    ax[2].legend()
    
    plt.show()
    

In [None]:
def get_title(emotion_index,
              intensity_index,
              statement_index,
              repetions_index):
    
    emotion = ref.emotions_ref[emotion_index - 1]
    intensity = ref.emotional_intensity_ref[intensity_index - 1]
    repetions = ref.repetition_ref[repetions_index - 1]
    statement = ref.statement_ref[statement_index - 1]
    
    return f'{emotion}, {intensity}\n{statement} - {repetions}'

# Data Processing and results population

In [None]:
'''
0 - emotion [1...8]
1 - intensity [1, 2]
2 - statement [1, 2]
3 - repetions [1, 2]
'''
process_source = [[1,1,1,1],[2,1,1,1],[3,1,1,1],[4,1,1,1],[5,1,1,1],[6,1,1,1],[7,1,1,1],[8,1,1,1],
                  [2,2,1,1],[3,2,1,1],[4,2,1,1],[5,2,1,1],[6,2,1,1],[7,2,1,1],[8,2,1,1],
                  [1,1,2,1],[2,1,2,1],[3,1,2,1],[4,1,2,1],[5,1,2,1],[6,1,2,1],[7,1,2,1],[8,1,2,1],
                  [1,1,1,2],[2,1,1,2],[3,1,1,2],[4,1,1,2],[5,1,1,2],[6,1,1,2],[7,1,1,2],[8,1,1,2],
                  [2,2,2,1],[3,2,2,1],[4,2,2,1],[5,2,2,1],[6,2,2,1],[7,2,2,1],[8,2,2,1],
                  [1,2,1,2],[2,2,1,2],[3,2,1,2],[4,2,1,2],[5,2,1,2],[6,2,1,2],[7,2,1,2],[8,2,1,2],
                  [2,2,2,2],[3,2,2,2],[4,2,2,2],[5,2,2,2],[6,2,2,2],[7,2,2,2],[8,2,2,2]]

In [None]:
for i in process_source:
    
    flt = 'Emotion == {} and EmotionalIntensity == {} and Statement == {} and Repetions == {}'.format(i[0], i[1], i[2], i[3])
    df_source = source.query(flt)    
    f_train, m_train = get_dataset(df_source)  
    
    show_diagram(m_train, f_train, get_title(i[0], i[1], i[2], i[3]))    