In [2]:
import pickle
data_path = '../IEMOCAP_DATA/IEMOCAP_features.pkl'

# users should use this instructor to load pkl dataset. 
videoIDs, videoSpeakers, videoLabels, videoText,\
    videoAudio, videoVisual, videoSentence, trainVid,\
        testVid = pickle.load(open(data_path, 'rb'), encoding='latin1')

In [3]:
print(len(videoIDs))

print(videoIDs['Ses03M_impro08b'])

print(len(trainVid))

print(trainVid)

print(len(testVid))

print(testVid)

151
['Ses03M_impro08b_M000', 'Ses03M_impro08b_F001', 'Ses03M_impro08b_M001', 'Ses03M_impro08b_F002', 'Ses03M_impro08b_M002', 'Ses03M_impro08b_F003', 'Ses03M_impro08b_M003', 'Ses03M_impro08b_F004', 'Ses03M_impro08b_M004', 'Ses03M_impro08b_F005', 'Ses03M_impro08b_F006', 'Ses03M_impro08b_F007', 'Ses03M_impro08b_M005', 'Ses03M_impro08b_F008', 'Ses03M_impro08b_M006', 'Ses03M_impro08b_F009', 'Ses03M_impro08b_M007', 'Ses03M_impro08b_F010', 'Ses03M_impro08b_M008', 'Ses03M_impro08b_M009', 'Ses03M_impro08b_F011', 'Ses03M_impro08b_M010', 'Ses03M_impro08b_F012', 'Ses03M_impro08b_M011', 'Ses03M_impro08b_F013', 'Ses03M_impro08b_M012', 'Ses03M_impro08b_F014', 'Ses03M_impro08b_M013', 'Ses03M_impro08b_M014', 'Ses03M_impro08b_F015', 'Ses03M_impro08b_M015', 'Ses03M_impro08b_F016', 'Ses03M_impro08b_M016', 'Ses03M_impro08b_M017', 'Ses03M_impro08b_M018', 'Ses03M_impro08b_M019', 'Ses03M_impro08b_M020', 'Ses03M_impro08b_F021', 'Ses03M_impro08b_M021', 'Ses03M_impro08b_F023', 'Ses03M_impro08b_M022', 'Ses03M_imp

In [38]:
import pandas as pd
conversation_dfs_train = {}
conversation_dfs_test = {}
participant_ID_toInt = {'M':1, 'F':0}

for ID in videoIDs:

    utterances = videoSentence[ID]
    emotions = videoLabels[ID]
    speakers = videoSpeakers[ID]
    speakers = [participant_ID_toInt[speaker] for speaker in speakers]
    utterance_IDs = videoIDs[ID]


    df = pd.DataFrame({
    'Utterance': utterances,
    'Speaker': speakers,
    'Emotion': emotions,
    'Utterance_ID': utterance_IDs,
    })
    if ID in trainVid:
        conversation_dfs_train[ID] = df
    else:
        conversation_dfs_test[ID] = df


In [49]:
conversation_dfs_test['Ses05M_impro08'].head(50)

Unnamed: 0,Utterance,Speaker,Emotion,Utterance_ID
0,"D.S.L. Extreme, can I help you?",1,2,Ses05M_impro08_M000
1,"Hi, I need- I need some help. I've been trans...",0,5,Ses05M_impro08_F000
2,This happens every two weeks?,1,2,Ses05M_impro08_M001
3,Yeah. That my service just goes out.,0,5,Ses05M_impro08_F001
4,You've lost your connection.,1,2,Ses05M_impro08_M002
5,yeah.,0,5,Ses05M_impro08_F002
6,okay.,1,2,Ses05M_impro08_M003
7,And I'll reset the IP address. I will do like...,0,5,Ses05M_impro08_F003
8,Hmm. Mmm.,1,2,Ses05M_impro08_M004
9,"Is not, It it It will work again for like a we...",0,5,Ses05M_impro08_F004


In [40]:
print(f'Number of Conversations in the Training Set: ', len(conversation_dfs_train))
print(f'Number of Utterances in the Training Set: ', sum(len(value) for value in conversation_dfs_train.values()))
print(f'Number of Conversations in the Testing Set: ', len(conversation_dfs_test))
print(f'Number of Utterances in the Testing Set: ', sum(len(value) for value in conversation_dfs_test.values()))

Number of Conversations in the Training Set:  120
Number of Utterances in the Training Set:  5810
Number of Conversations in the Testing Set:  31
Number of Utterances in the Testing Set:  1623


In [41]:
from transformers import BertTokenizer, BertModel
import torch

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Set the model to evaluation mode to prevent updates to weights
model.eval()

def embedding_func(speech_line):
    """
    Generate BERT embedding for a given speech line.

    Parameters:
    - speech_line: A string representing a single line of speech.

    Returns:
    - A 768-dimensional BERT embedding as a list of floats.
    """
    # Tokenize the input text and convert to PyTorch tensors
    inputs = tokenizer(speech_line, return_tensors='pt', truncation=True, padding=True, max_length=512)

    # Get the model outputs (embedding) from the last hidden layer
    with torch.no_grad():
        outputs = model(**inputs)
    
    # The embeddings are in 'last_hidden_state', take the [CLS] token as the sentence-level embedding
    cls_embedding = outputs.last_hidden_state[:, 0, :]  # [CLS] token embedding
    
    # Convert to a Python list (detaching from the computation graph)
    embedding = cls_embedding.squeeze().tolist()

    return embedding

In [46]:
def create_dialogue_graphs(dfs):

    dialogue_graphs = []

    # Create a placeholder for null embeddings (assuming embeddings are vectors)
    null_embedding = [0] * 768  

    # To iterate over each group
    for dialogue in dfs.values():

        participants = dialogue['Speaker'].unique()
        G = []

        for index, row in dialogue.iterrows():
        
            G_i = {'X': {}, 'Y': 'Uninitialized'}
            G_i['Y'] = row['Emotion']
            speaker = row['Speaker']

            # Initialize speaker nodes for each timestamp
            for p in participants:
                if p == speaker:
                    speaker_embedding = embedding_func(row['Utterance'])
                    speaker_edges = [f'{node}_{index}' for node in participants if node != speaker]
                    G_i['X'][f'{p}_{index}'] = {'embedding': speaker_embedding, 'edges': speaker_edges}
                else:    
                    G_i['X'][f'{p}_{index}'] = {'embedding': null_embedding, 'edges': []}

            G.append(G_i)

        dialogue_graphs.append(G)


    return dialogue_graphs

In [47]:
IEMOCAP_train_dialogue_graphs = create_dialogue_graphs(conversation_dfs_train)
IEMOCAP_test_dialogue_graphs = create_dialogue_graphs(conversation_dfs_test)

In [48]:
import pickle

with open('data/ERC/IEMOCAP/train_dialogue_graphs.pkl', 'wb') as f:
    pickle.dump(IEMOCAP_train_dialogue_graphs, f)

with open('data/ERC/IEMOCAP/test_dialogue_graphs.pkl', 'wb') as f:
    pickle.dump(IEMOCAP_test_dialogue_graphs, f)