# Imports

In [1]:
# Set current working directory to parent folder
import os
os.chdir(os.path.abspath(".."))

import pandas as pd
import numpy as np
import json
from transformers import WhisperTokenizer
from src.traductores import obtener_emocion
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, Embedding, GlobalAveragePooling1D
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Archivos importantes y paths
df_annotations = pd.read_excel('data/annotations.xlsx')

# Omitimos todos los audios en development
df_annotations = df_annotations[df_annotations['Type'] != 'Development'].reset_index(drop = True)

  from .autonotebook import tqdm as notebook_tqdm


# Preparación dataset

## Tamaño vocabulario

In [2]:
# Obtener tamaño total del vocabulario
all_tokens = WhisperTokenizer.from_pretrained("openai/whisper-large").get_vocab()
max_value = max(zip(all_tokens.values(), all_tokens.keys()))[0]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Creación dataset de texto

In [3]:
# Creación del dataset de texto
df_texto = pd.DataFrame()

df_texto.loc[0, 'id'] = 0

for file in os.listdir('data/TRANSCRIPCIONES/WHISPER'):
    with open(f'data/TRANSCRIPCIONES/WHISPER/{file}', 'r') as f: 
        data = json.load(f)
    
    start = max(df_texto.index)
    
    for i in range(len(data['segments'])):
        df_texto.at[i + start, 'Audio'] = file[:-5]
        df_texto.at[i + start, 'id'] = int(data['segments'][i]['id'])
        df_texto.at[i + start, 'Text'] = data['segments'][i]['text']
        df_texto.at[i + start, 'Tokens'] = str(data['segments'][i]['tokens'][1:-1])


# Agregamos cantidad de palabras
df_texto['num_words'] = df_texto['Text'].apply(lambda x:len(str(x).split()))

df_texto.head()

Unnamed: 0,id,Audio,Text,Tokens,num_words
0,0.0,MSP-Conversation_0002,It's our honor to talk about important issues...,"[467, 311, 527, 5968, 281, 751, 466, 1021, 266...",16
1,1.0,MSP-Conversation_0002,"The foundation of our show, Heart of the Matt...","[440, 7030, 295, 527, 855, 11, 13569, 295, 264...",13
2,2.0,MSP-Conversation_0002,Important issues and why they should be on ou...,"[42908, 2663, 293, 983, 436, 820, 312, 322, 52...",11
3,3.0,MSP-Conversation_0002,Looking at things as right versus wrong inste...,"[11053, 412, 721, 382, 558, 5717, 2085, 2602, ...",12
4,4.0,MSP-Conversation_0002,And sharing stories with real people's experi...,"[400, 5414, 3676, 365, 957, 561, 311, 5235, 37...",19


## Concatenar Objetivos

In [4]:
# Obtener objetivos
with open(f'data/MODELS/v3/objetivos.json', 'r') as f: objetivos = json.load(f)

df_objetivos = pd.DataFrame()

for _key in objetivos:
    df_temp = pd.DataFrame({'Audio': _key[:21], 'indice': objetivos[_key]['indice'], 'targets': objetivos[_key]['targets']})
    df_objetivos = pd.concat([df_temp, df_objetivos])

In [5]:
# Merge
df_texto['id'] = df_texto['id'].astype(int)
df_final = pd.merge(df_texto, df_objetivos, how = 'left', right_on = ['Audio','indice'], left_on = ['Audio','id'])

In [6]:
# Saco los de development que estan en el texto
df_final = df_final.dropna(subset = 'targets')

# Agrego emoción categoróica
df_final['Target'] = [obtener_emocion(i[0],i[1],i[2], mapping = 'Ekman') for i in df_final['targets']]

## Borrar ids con 0 palabras

In [7]:
df_final[df_final['num_words'] == 0]

Unnamed: 0,id,Audio,Text,Tokens,num_words,indice,targets,Target
83341,726,MSP-Conversation_2281,,[],0,726.0,"[7.339746733985966, 27.070739517689248, 36.032...",neutral


In [8]:
df_final = df_final[df_final['num_words'] > 0]

## Convertir tokens en formato lista

In [9]:
def convertir_strarray_a_array(filas):
    x = []
    for fila in filas:
        fila = fila.replace('[','').replace(']','').split(',')
        x.append([int(i) for i in fila])

    return x

df_final['Tokens'] = convertir_strarray_a_array(df_final['Tokens'])

## Concatenar tipo

In [10]:
df_final['Audio_Name'] = df_final['Audio'] + '.wav'

df_final = pd.merge(df_final, 
                    df_annotations[['Audio_Name','Type']].drop_duplicates(), 
                    how = 'left', 
                    left_on = 'Audio_Name', 
                    right_on = 'Audio_Name')

# Pre Procesado

In [19]:
# Hay que investigar por que hay targets na
df_final = df_final[~df_final['Target'].isna()]

## Train test split

In [11]:
x_train = [i for i in df_final[df_final['Type'] == 'Train']['Tokens'].values]
x_test = [i for i in df_final[df_final['Type'] == 'Test']['Tokens'].values]

x_train = pad_sequences(x_train, padding='post',maxlen = max(df_final['num_words']))
x_test = pad_sequences(x_test, padding='post',maxlen = max(df_final['num_words']))

# Encoder de las emociones
Y = df_final['Target'].values
encoder = OneHotEncoder()
encoder.fit(np.array(Y).reshape(-1,1))

y_train = df_final[df_final['Type'] == 'Train']['Target'].values
y_test = df_final[df_final['Type'] == 'Test']['Target'].values
y_train = encoder.transform(np.array(y_train).reshape(-1,1)).toarray()
y_test = encoder.transform(np.array(y_test).reshape(-1,1)).toarray()

## Modelo todas las emocioens

In [14]:
model = Sequential()
model.add(Embedding(input_dim = max_value, output_dim = 16, input_length=x_train.shape[1]))
model.add(GlobalAveragePooling1D())
model.add(Dense(1024, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=5, min_lr=0.0000001)
es = EarlyStopping(monitor='val_loss', patience=50)
model.fit(x_train, y_train, epochs= 50, validation_data=(x_test, y_test), callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
 317/1595 [====>.........................] - ETA: 14s - loss: 0.8133 - accuracy: 0.7048

KeyboardInterrupt: 

## Modelo flag neutral

In [23]:
x_train = [i for i in df_final[df_final['Type'] == 'Train']['Tokens'].values]
x_test = [i for i in df_final[df_final['Type'] == 'Test']['Tokens'].values]

x_train = pad_sequences(x_train, padding='post',maxlen = max(df_final['num_words']))
x_test = pad_sequences(x_test, padding='post',maxlen = max(df_final['num_words']))

# Encoder de las emociones
Y = np.where(df_final['Target'].values == 'neutral', True, False)
encoder = OneHotEncoder()
encoder.fit(np.array(Y).reshape(-1,1))

y_train = np.where(df_final[df_final['Type'] == 'Train']['Target'].values  == 'neutral', True, False)
y_test = np.where(df_final[df_final['Type'] == 'Test']['Target'].values  == 'neutral', True, False)
y_train = encoder.transform(np.array(y_train).reshape(-1,1)).toarray()
y_test = encoder.transform(np.array(y_test).reshape(-1,1)).toarray()

model = Sequential()
model.add(Embedding(input_dim = max_value, output_dim = 16, input_length=x_train.shape[1]))
model.add(GlobalAveragePooling1D())
model.add(Dense(1024, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.4, verbose=0, patience=5, min_lr=0.0000001)
es = EarlyStopping(monitor='val_loss', patience=50)
model.fit(x_train, y_train, epochs= 50, validation_data=(x_test, y_test), callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x23511bdf220>