<h1>Extracting labels from the evaluation files</h1>

In [2]:
import re

file_path = '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Ses01F_impro01.txt'

useful_regex = re.compile(r'\[.+\]\n', re.IGNORECASE)

with open(file_path) as f:
    file_content = f.read()

info_lines = re.findall(useful_regex, file_content)

print(info_lines)

['[START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]\n', '[6.2901 - 8.2357]\tSes01F_impro01_F000\tneu\t[2.5000, 2.5000, 2.5000]\n', '[10.0100 - 11.3925]\tSes01F_impro01_F001\tneu\t[2.5000, 2.5000, 2.5000]\n', '[14.8872 - 18.0175]\tSes01F_impro01_F002\tneu\t[2.5000, 2.5000, 2.5000]\n', '[19.2900 - 20.7875]\tSes01F_impro01_F003\txxx\t[2.5000, 3.0000, 3.0000]\n', '[21.3257 - 24.7400]\tSes01F_impro01_F004\txxx\t[2.5000, 3.0000, 2.5000]\n', '[27.4600 - 31.4900]\tSes01F_impro01_F005\tneu\t[2.5000, 3.5000, 2.0000]\n', '[38.9650 - 43.5900]\tSes01F_impro01_F006\tfru\t[2.0000, 3.5000, 3.5000]\n', '[46.5800 - 52.1900]\tSes01F_impro01_F007\tfru\t[2.5000, 3.5000, 3.5000]\n', '[56.1600 - 58.8225]\tSes01F_impro01_F008\tfru\t[2.0000, 3.5000, 3.5000]\n', '[61.8700 - 65.9700]\tSes01F_impro01_F009\tfru\t[2.0000, 3.5000, 3.0000]\n', '[66.4200 - 69.3400]\tSes01F_impro01_F010\txxx\t[1.5000, 3.5000, 3.5000]\n', '[72.4500 - 82.2600]\tSes01F_impro01_F011\tfru\t[2.0000, 3.5000, 3.5000]\n', '[85.2700 - 88.0200]

In [3]:
for l in info_lines[0:100]:
    print(l.strip().split('\t'))

['[START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]']
['[6.2901 - 8.2357]', 'Ses01F_impro01_F000', 'neu', '[2.5000, 2.5000, 2.5000]']
['[10.0100 - 11.3925]', 'Ses01F_impro01_F001', 'neu', '[2.5000, 2.5000, 2.5000]']
['[14.8872 - 18.0175]', 'Ses01F_impro01_F002', 'neu', '[2.5000, 2.5000, 2.5000]']
['[19.2900 - 20.7875]', 'Ses01F_impro01_F003', 'xxx', '[2.5000, 3.0000, 3.0000]']
['[21.3257 - 24.7400]', 'Ses01F_impro01_F004', 'xxx', '[2.5000, 3.0000, 2.5000]']
['[27.4600 - 31.4900]', 'Ses01F_impro01_F005', 'neu', '[2.5000, 3.5000, 2.0000]']
['[38.9650 - 43.5900]', 'Ses01F_impro01_F006', 'fru', '[2.0000, 3.5000, 3.5000]']
['[46.5800 - 52.1900]', 'Ses01F_impro01_F007', 'fru', '[2.5000, 3.5000, 3.5000]']
['[56.1600 - 58.8225]', 'Ses01F_impro01_F008', 'fru', '[2.0000, 3.5000, 3.5000]']
['[61.8700 - 65.9700]', 'Ses01F_impro01_F009', 'fru', '[2.0000, 3.5000, 3.0000]']
['[66.4200 - 69.3400]', 'Ses01F_impro01_F010', 'xxx', '[1.5000, 3.5000, 3.5000]']
['[72.4500 - 82.2600]', 'Ses01F_impro01_F011

<h1>Compile all the informarion in a single file</h1>

In [4]:
import re
import os


info_line = re.compile(r'\[.+\]\n', re.IGNORECASE)

start_times, end_times, wav_file_names, emotions, vals, acts, doms = [], [], [], [], [], [], []

for sess in range(1, 6):
    emo_evaluation_dir = '../../IEMOCAP_full_release/Session{}/dialog/EmoEvaluation/'.format(sess)
    evaluation_files = [l for l in os.listdir(emo_evaluation_dir) if 'Ses' in l]
    for file in evaluation_files:
        with open(emo_evaluation_dir + file) as f:
            content = f.read()
        info_lines = re.findall(info_line, content)
        for line in info_lines[1:]:  # the first line is a header
            start_end_time, wav_file_name, emotion, val_act_dom = line.strip().split('\t')
            start_time, end_time = start_end_time[1:-1].split('-')
            val, act, dom = val_act_dom[1:-1].split(',')
            val, act, dom = float(val), float(act), float(dom)
            start_time, end_time = float(start_time), float(end_time)
            start_times.append(start_time)
            end_times.append(end_time)
            wav_file_names.append(wav_file_name)
            emotions.append(emotion)
            vals.append(val)
            acts.append(act)
            doms.append(dom)

In [5]:
import pandas as pd

df_iemocap = pd.DataFrame(columns=['start_time', 'end_time', 'wav_file', 'emotion', 'val', 'act', 'dom'])

df_iemocap['start_time'] = start_times
df_iemocap['end_time'] = end_time
df_iemocap['wav_file'] = wav_file_names
df_iemocap['emotion'] = emotions
df_iemocap['val'] = vals
df_iemocap['act'] = acts
df_iemocap['dom'] = doms

df_iemocap.tail()

Unnamed: 0,start_time,end_time,wav_file,emotion,val,act,dom
10034,195.67,221.9,Ses05M_impro03_M029,exc,3.6667,3.6667,2.6667
10035,203.19,221.9,Ses05M_impro03_M030,hap,3.6667,3.6667,2.6667
10036,210.58,221.9,Ses05M_impro03_M031,hap,4.0,3.6667,2.6667
10037,215.3,221.9,Ses05M_impro03_M032,hap,4.3333,3.6667,3.0
10038,219.75,221.9,Ses05M_impro03_M033,hap,4.6667,3.3333,3.0


In [6]:
df_iemocap.to_csv('../../IEMOCAP_full_release/dialog_EmoEvaluation_labels.csv', index=False)