In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import numpy as np
import pandas as pd
from tqdm import tqdm

from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage

In [2]:
data = pd.read_csv('../data/IEMOCAP/modified_audio_data.csv')
data.head()

Unnamed: 0,session,fn,idx,sex,emotion,text,audio_caption
0,1,Ses01F_impro07,0,M,exc,Did you get the letter?,The speaker's voice is that of an English male...
1,1,Ses01F_impro07,1,F,exc,"Yes. There's a big envelope it says, you're i...",The speaker's voice is high-pitched with a you...
2,1,Ses01F_impro07,2,M,exc,Yeah. That is so awesome.,The speaker's voice has a bright quality with ...
3,1,Ses01F_impro07,4,M,exc,Oh my God. What are you going to do? [LAUGHTER],The speaker's voice has a light and airy quali...
4,1,Ses01F_impro07,5,F,exc,So I have to move back to the ghetto but...I k...,The speaker's voice has a light and airy quali...


In [3]:
dia_ids = sorted(data['fn'].unique())
len(dia_ids)

151

In [4]:
# sys_prompt = SystemMessage(
#     """You are a useful AI assistant that specializes in describing person's behavior on their lines in a conversation. 
#     You get a part of conversation and a name of person you need to describe. Use maximum 100 tokens to describe person. Return ONLY person's description."""
# )

sys_prompt = SystemMessage(
    """You are an expert at analyzing the emotion of utterances among speakers in a conversation. 
    You get a part of conversation and a name of person you need to describe. Return ONLY person's description."""
)

In [5]:
model = ChatOllama(model="qwen2.5:7b-instruct", num_ctx=2048)

# Description based on full dialogue

In [6]:
bios = {
    'fn': [],
    'sex': [],
    'bio': [],
}

for d_id in tqdm(dia_ids):
    dia = data.loc[data['fn'] == d_id].sort_values('idx')
    dia_text = ""
    for i, utt in dia.iterrows():
        dia_text += f"{utt['sex']}: {utt['text']} \n "
    
    for spk in dia['sex'].unique():
        inp = HumanMessage(f"Given this conversation between speakers:\n {slice} \n In overall above conversation, what do you think about the characteristics of speaker {spk}?  (Note: provide an answer within 100 words)")
        desc = model.invoke([sys_prompt, inp]).content
        
        bios['fn'].append(d_id)
        bios['sex'].append(spk)
        bios['bio'].append(desc)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 151/151 [02:47<00:00,  1.11s/it]


In [7]:
bios = pd.DataFrame(bios)
bios.head()

Unnamed: 0,fn,sex,bio
0,Ses01F_impro01,F,Speaker F appears to be a calm and composed in...
1,Ses01F_impro01,M,Speaker M appears confident and direct in the ...
2,Ses01F_impro02,F,Speaker F appears to be a thoughtful and refle...
3,Ses01F_impro02,M,Speaker M appears to be assertive and possibly...
4,Ses01F_impro03,M,Speaker M appears to be assertive and direct i...


In [8]:
merged = data.merge(bios, on=['fn', 'sex'])
merged.head()

Unnamed: 0,session,fn,idx,sex,emotion,text,audio_caption,bio
0,1,Ses01F_impro07,0,M,exc,Did you get the letter?,The speaker's voice is that of an English male...,Speaker M appears to be articulate and compose...
1,1,Ses01F_impro07,1,F,exc,"Yes. There's a big envelope it says, you're i...",The speaker's voice is high-pitched with a you...,Speaker F appears to be a straightforward and ...
2,1,Ses01F_impro07,2,M,exc,Yeah. That is so awesome.,The speaker's voice has a bright quality with ...,Speaker M appears to be articulate and compose...
3,1,Ses01F_impro07,4,M,exc,Oh my God. What are you going to do? [LAUGHTER],The speaker's voice has a light and airy quali...,Speaker M appears to be articulate and compose...
4,1,Ses01F_impro07,5,F,exc,So I have to move back to the ghetto but...I k...,The speaker's voice has a light and airy quali...,Speaker F appears to be a straightforward and ...


In [9]:
merged.shape

(10038, 8)

In [10]:
merged.to_csv('../data/IEMOCAP/modified_data.csv', index=False)