In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import numpy as np
import pandas as pd
from tqdm import tqdm

from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage

In [36]:
data = pd.read_csv('modified_test_data.csv')
data.head()

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,audio_caption,visual_caption
0,1,Why do all youre coffee mugs have numbers on ...,Mark,surprise,positive,0,0,3,19,"00:14:38,127","00:14:40,378",Neutral,The image shows a man and woman standing in a ...
1,2,Oh. Thats so Monica can keep track. That way ...,Rachel,anger,negative,0,1,3,19,"00:14:40,629","00:14:47,385",The speaker sounds happy.,The image shows a man and woman standing in a ...
2,3,Y'know what?,Rachel,neutral,neutral,0,2,3,19,"00:14:56,353","00:14:57,520",The speaker sounds like a woman.,The image shows a man and woman sitting at a t...
3,19,"Come on, Lydia, you can do it.",Joey,neutral,neutral,1,0,1,23,"0:10:44,769","0:10:46,146",The speaker sounds angry.,The image shows a woman laying in a hospital b...
4,20,Push!,Joey,joy,positive,1,1,1,23,"0:10:46,146","0:10:46,833","The speaker sounds male, aged between 16-25 ye...",The image shows a woman laying in a hospital b...


In [37]:
dia_ids = sorted(data['Dialogue_ID'].unique())
len(dia_ids)

280

In [38]:
# sys_prompt = SystemMessage(
#     """You are a useful AI assistant that specializes in describing person's behavior on their lines in a conversation. 
#     You get a part of conversation and a name of person you need to describe. Use maximum 100 tokens to describe person. Return ONLY person's description."""
# )

sys_prompt = SystemMessage(
    """You are an expert at analyzing the emotion of utterances among speakers in a conversation. 
    You get a part of conversation and a name of person you need to describe. Return ONLY person's description."""
)

In [39]:
model = ChatOllama(model="qwen2.5:7b-instruct", num_ctx=2048)

# Description based on full dialogue

In [44]:
bios = {
    'Dialogue_ID': [],
    'Speaker': [],
    'Bio': [],
}

for d_id in tqdm(dia_ids):
    dia = data.loc[data['Dialogue_ID'] == d_id].sort_values('Utterance_ID')
    dia_text = ""
    for i, utt in dia.iterrows():
        dia_text += f"{utt['Speaker']}: {utt['Utterance']} \n "
    
    for spk in dia['Speaker'].unique():
        inp = HumanMessage(f"Given this conversation between speakers:\n {slice} \n In overall above conversation, what do you think about the characteristics of speaker {spk}?  (Note: provide an answer within 100 words)")
        desc = model.invoke([sys_prompt, inp]).content
        
        bios['Dialogue_ID'].append(d_id)
        bios['Speaker'].append(spk)
        bios['Bio'].append(desc)


  1%|          | 2/280 [00:10<23:28,  5.07s/it]


In [41]:
bios = pd.DataFrame(bios)
bios.head()

Unnamed: 0,Dialogue_ID,Speaker,Bio
0,0,Mark,"Mark appears curious and observant, likely pay..."
1,0,Rachel,Rachel appears to be casual and humorous. Her ...
2,1,Joey,The given conversation does not include any ut...
3,2,Ross,"Ross appears curious and observant, noting a d..."
4,2,Rachel,Rachel appears to be practical and organized. ...


In [42]:
merged = data.merge(bios, on=['Dialogue_ID', 'Speaker'])
merged.head()

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,audio_caption,visual_caption,Bio
0,1,Why do all youre coffee mugs have numbers on ...,Mark,surprise,positive,0,0,3,19,"00:14:38,127","00:14:40,378",Neutral,The image shows a man and woman standing in a ...,"Mark appears curious and observant, likely pay..."
1,2,Oh. Thats so Monica can keep track. That way ...,Rachel,anger,negative,0,1,3,19,"00:14:40,629","00:14:47,385",The speaker sounds happy.,The image shows a man and woman standing in a ...,Rachel appears to be casual and humorous. Her ...
2,3,Y'know what?,Rachel,neutral,neutral,0,2,3,19,"00:14:56,353","00:14:57,520",The speaker sounds like a woman.,The image shows a man and woman sitting at a t...,Rachel appears to be casual and humorous. Her ...
3,19,"Come on, Lydia, you can do it.",Joey,neutral,neutral,1,0,1,23,"0:10:44,769","0:10:46,146",The speaker sounds angry.,The image shows a woman laying in a hospital b...,The given conversation does not include any ut...
4,20,Push!,Joey,joy,positive,1,1,1,23,"0:10:46,146","0:10:46,833","The speaker sounds male, aged between 16-25 ye...",The image shows a woman laying in a hospital b...,The given conversation does not include any ut...


In [51]:
merged.shape

(2610, 14)

In [52]:
merged.to_csv('modified3_test_data.csv', index=False)

# Description based on past context

In [18]:
bios = []

for d_id in tqdm(dia_ids):
    dia = data.loc[data['Dialogue_ID'] == d_id].sort_values('Utterance_ID')
    dia_slices = []
    dia_text = ""
    for i, utt in dia.iterrows():
        dia_text += f"{utt['Speaker'].upper()}: {utt['Utterance']} \n"
        dia_slices.append((dia_text, utt['Speaker'].upper()))

    for slice, spk in dia_slices:
        # inp = HumanMessage(f"CONVERSATION: \n {slice} DESCRIBE {spk}:")
        inp = HumanMessage(f"Given this conversation between speakers:\n {slice} \n In overall above conversation, what do you think about the characteristics of speaker {spk}?  (Note: provide an answer within 250 words)")
        desc = model.invoke([sys_prompt, inp]).content
        bios.append(desc)
    break

  0%|          | 0/280 [00:01<?, ?it/s]


In [10]:
data = data.sort_values(['Dialogue_ID', 'Utterance_ID'])

data['Bio'] = bios

In [11]:
data.to_csv('modified2_test_data.csv', index=False)