In [1]:
import pandas as pd

# IEMOCAP

In [2]:
audio = pd.read_csv('../data/IEMOCAP/modified_audio_data.csv')
visual = pd.read_csv('../data/IEMOCAP/modified_video_data.csv').drop(['emotion', 'text'], axis=1)
au = pd.read_csv('../data/IEMOCAP/modified_au_data.csv').drop([	'Unnamed: 0', 'emotion', 'text'], axis=1)
biography = pd.read_csv('../data/IEMOCAP/modified_bio_data.csv').drop(['emotion', 'text', 'audio_caption'], axis=1)
audio.shape, visual.shape, au.shape, biography.shape

((10020, 7), (10020, 5), (10020, 5), (10038, 5))

In [3]:
merged_df = audio.merge(visual, on=['session', 'fn', 'idx', 'sex'])
merged_df = merged_df.merge(au, on=['session', 'fn', 'idx', 'sex'])
merged_df = merged_df.merge(biography, on=['session', 'fn', 'idx', 'sex'])
merged_df.isna().sum().sum()

0

In [4]:
IEMOCAP_train = merged_df[merged_df.session != 5]
IEMOCAP_test = merged_df[merged_df.session == 5]
IEMOCAP_train.shape, IEMOCAP_test.shape

((7853, 10), (2167, 10))

In [5]:
IEMOCAP_train.to_csv('../data/IEMOCAP/modified_train_data_v5.csv', index=False)
IEMOCAP_test.to_csv('../data/IEMOCAP/modified_test_data_v5.csv', index=False)

# MELD

In [6]:
MELD_train = pd.read_csv('../data/MELD/modified4_train_data.csv').drop(['Unnamed: 0'], axis=1)
MELD_test = pd.read_csv('../data/MELD/modified4_test_data.csv').drop(['Unnamed: 0'], axis=1)
MELD_train.shape, MELD_test.shape

((9989, 15), (2610, 15))

In [7]:
visual = pd.read_csv('../data/MELD/modified_video_data.csv')
visual_test = pd.read_csv('../data/MELD/modified_video_test_data.csv')
visual.shape, visual_test.shape

((9989, 12), (2610, 12))

In [8]:
audio = pd.read_csv('../data/MELD/modified_audio_data.csv')
audio_test = pd.read_csv('../data/MELD/modified_audio_test_data.csv')
audio.shape, audio_test.shape

((9989, 12), (2610, 12))

In [9]:
MELD_train['audio_caption'] = audio['audio_caption']
MELD_train['visual_caption'] = visual['video_caption']
MELD_test['audio_caption'] = audio_test['audio_caption']
MELD_test['visual_caption'] = visual_test['video_caption']
MELD_train.isna().sum().sum(), MELD_test.isna().sum().sum()

(0, 0)

In [10]:
MELD_train.to_csv('../data/MELD/modified_train_data_v5.csv', index=False)
MELD_test.to_csv('../data/MELD/modified_test_data_v5.csv', index=False)

# Merged

In [11]:
def merge_data(df1, df2, mapping):
    df2['Emotion'] = [mapping[e] for e in df2.Emotion]
    df2['session'] = -1
    df2['fn'] = df2['Dialogue_ID']
    df2['idx'] = df2['Utterance_ID']
    df2['sex'] = df2['Speaker']
    
    df2 = df2.rename(columns={'Utterance': 'text', 'visual_caption': 'video_caption', 
                              'Emotion': 'emotion', 'Bio': 'bio'})
    
    cols = ['session', 'fn', 'idx', 'sex', 'emotion', 'text', 'audio_caption', 'video_caption', 'aus', 'bio']
    
    df1 = df1[cols]
    df2 = df2[cols]
    
    df_merged = pd.concat([df1, df2], ignore_index=True)
    df_merged = df_merged.sort_values(by=['session', 'fn', 'idx']).reset_index(drop=True)
    
    return df_merged

In [12]:
mapping = {'neutral': 'neu', 'surprise': 'exc', 'fear': 'fru', 'sadness': 'sad', 
           'joy': 'hap', 'disgust': 'fru', 'anger': 'ang'}
train = merge_data(IEMOCAP_train, MELD_train, mapping)
test = merge_data(IEMOCAP_test, MELD_test, mapping)
train.shape, test.shape

((17842, 10), (4777, 10))

In [13]:
train.sample(5)

Unnamed: 0,session,fn,idx,sex,emotion,text,audio_caption,video_caption,aus,bio
2911,-1,307,7,Ross,neu,"We said it was only going to be two weeks, yk...",The speaker's voice has a slightly deep tone w...,The image depicts two individuals seated close...,Nan,"Ross seems inquisitive and detail-oriented, as..."
8848,-1,930,10,Ross,neu,Die Hard,The speaker's voice possesses a bright quality...,The image depicts a scene from the popular TV ...,"Inner Brow Raiser, Outer Brow Raiser, Upper Li...","Based on the given conversation, it seems that..."
10722,1,Ses01F_script03_1,31,M,exc,Oh of course. We are figures of fun. All right.,The speaker's voice possesses a medium pitch w...,The image appears to be a split-screen video c...,"Brow Lowerer, Upper Lid Raiser, Lid Tightener,...",Speaker M appears to be assertive and direct i...
5424,-1,575,2,Julie,neu,"No Cobb, as in cobb salad.",The speaker's voice possesses a light and airy...,The image depicts a scene from what appears to...,Nan,There is no mention or presence of a speaker n...
9396,-1,982,4,Joey,exc,Really?!,The speaker's voice possesses a light and airy...,The image depicts two individuals engaged in c...,"Inner Brow Raiser, Brow Lowerer, Lip Corner De...",There is no mention or reference to a characte...


In [None]:
train.to_csv('../data/IEMOCAP/modified_train_data(+MELD)_v5.csv', index=False)
test.to_csv('../data/IEMOCAP/modified_test_data(+MELD)_v5.csv', index=False)