# Using LLMs to annotate dialogue acts

## Processing the data

In [23]:
import pandas as pd


In [24]:
dialogue1 = pd.read_csv('swda\swda\sw12utt\sw_1200_2121.utt.csv')
dialogue2 = pd.read_csv('swda\swda\sw12utt\sw_1201_2131.utt.csv')
dialogue3 = pd.read_csv('swda\swda\sw12utt\sw_1202_2151.utt.csv')

print(dialogue1.head())


              swda_filename ptb_basename  conversation_no  transcript_index  \
0  sw12utt/sw_1200_2121.utt     2/sw2121             2121                 0   
1  sw12utt/sw_1200_2121.utt     2/sw2121             2121                 1   
2  sw12utt/sw_1200_2121.utt     2/sw2121             2121                 2   
3  sw12utt/sw_1200_2121.utt     2/sw2121             2121                 3   
4  sw12utt/sw_1200_2121.utt     2/sw2121             2121                 4   

  act_tag caller  utterance_index  subutterance_index  \
0       o      A                1                   1   
1      qw      A                1                   2   
2      ^h      B                2                   1   
3      sv      B                2                   2   
4      qo      B                2                   3   

                                                text  \
0                                  Okay, {F uh, }  /   
1  could you tell me what you think contributes m...   
2             

In [25]:
dialogue1_targets = dialogue1['act_tag']
dialogue2_targets = dialogue2['act_tag']
dialogue3_targets = dialogue3['act_tag']

dialogue1_res = dialogue1[['caller', 'text', 'act_tag']]
dialogue2_res = dialogue2[['caller', 'text', 'act_tag']]
dialogue3_res = dialogue3[['caller', 'text', 'act_tag']]

dialogue1 = dialogue1[['caller', 'text']]
dialogue2 = dialogue2[['caller', 'text']]
dialogue3 = dialogue3[['caller', 'text']]



In [26]:
print(dialogue1.head())
print(dialogue1_targets.head())

  caller                                               text
0      A                                  Okay, {F uh, }  /
1      A  could you tell me what you think contributes m...
2      B                    {D Well, } it's hard to say.  /
3      B  {E I mean, } while it's certainly the case tha...
4      B                               What do you think? /
0     o
1    qw
2    ^h
3    sv
4    qo
Name: act_tag, dtype: object


## OpenAI Setup

In [27]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Example of accessing an environment variable
openai_key = os.getenv('OPENAI_API_KEY')

In [28]:
from openai import OpenAI
client = OpenAI(api_key=openai_key)
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "user", "content": "write a haiku about ai"}
    ],
    temperature=0.2
)

print(completion.choices[0].message.content)

Silent circuits hum,  
Thoughts born from electric dreams,  
Mind of code and light.


In [29]:
def test_prompt(prompt):
    print("using prompt", prompt)
    
    print("generating for dialogue 1")
    dialogue1_comp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": dialogue1.to_string()}
        ]
    )

    print("generating for dialogue 2")
    dialogue2_comp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": dialogue2.to_string()}
        ]
    )

    print("generating for dialogue 3")
    dialogue3_comp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": dialogue3.to_string()}
        ]
    )

    d1_res = dialogue1_comp.choices[0].message.content
    
    d2_res = dialogue2_comp.choices[0].message.content
    
    d3_res = dialogue3_comp.choices[0].message.content
    
    def reduce_res(res, dialogue):
        out = []
        print("curr len", len(res.split(",")))
        if len(res.split(",")) < len(dialogue):
            out = res.split(",")
            out.extend([""] * (len(dialogue) - len(res.split(","))))
        elif len(res.split(",")) > len(dialogue):
            out = res.split(",")[:len(dialogue)]
        return out
    
    return reduce_res(d1_res, dialogue1_res), reduce_res(d2_res, dialogue2_res), reduce_res(d3_res, dialogue3_res)

## Task 1, zero-shot no list of dialogue act tags

In [30]:
task1_prompt = '''
You are are a classifier that identifies dialogue act tags for a given dialogue.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.
Output exactly one dialogue act per line.
'''

In [31]:
d1_t1_res, d2_t1_res, d3_t1_res = test_prompt(task1_prompt)

using prompt 
You are are a classifier that identifies dialogue act tags for a given dialogue.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.
Output exactly one dialogue act per line.

generating for dialogue 1
generating for dialogue 2
generating for dialogue 3
curr len 428
curr len 176
curr len 79


In [32]:
print(d1_t1_res)
print(d2_t1_res)
print(d3_t1_res)

['Okay', ' \nQuestion', ' \nAcknowledgment', ' \nStatement', ' \nQuestion', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nQuestion', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nQuestion', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nQuestion', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nAcknowledgment', ' \nQuestion', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nStatement', ' \nAcknowledgment', ' \nStatement', ' \nAcknowledgment', ' \nAcknowledgment'

In [33]:
dialogue1_res['task1_pred'] = d1_t1_res
dialogue2_res['task1_pred'] = d2_t1_res
dialogue3_res['task1_pred'] = d3_t1_res

## Task 2, zero-shot with list of possible dialogue acts

In [34]:
import numpy as np
# find all different types of tags
all_tags = pd.concat([dialogue1_targets, dialogue2_targets, dialogue3_targets]).unique()
print(all_tags)

['o' 'qw' '^h' 'sv' 'qo' 'sd' 'b' '+' 'qy' 'nn' 'sd^e' 'bk' '%' 'bf' 'ny'
 'ny^r' 'bh' 'x' 'ba' 'b^r' 'aa' 'qy^d' 'na' 'b^m' 'h' 'nd' '"' 'qh' 'no'
 'sd^m' 'ad' 'sv^e' 'ng' 'qy^g' 'ba^r' '^2' 'aa^r' 'sd^t' 'fc' 'ft']


In [35]:
task2_prompt = '''
You are are a classifier that identifies dialogue act tags for a given dialogue.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from:
'''

task2_prompt += ', '.join(all_tags)
print(task2_prompt)


You are are a classifier that identifies dialogue act tags for a given dialogue.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from:
o, qw, ^h, sv, qo, sd, b, +, qy, nn, sd^e, bk, %, bf, ny, ny^r, bh, x, ba, b^r, aa, qy^d, na, b^m, h, nd, ", qh, no, sd^m, ad, sv^e, ng, qy^g, ba^r, ^2, aa^r, sd^t, fc, ft


In [36]:
d1_t2_res, d2_t2_res, d3_t2_res = test_prompt(task2_prompt)

using prompt 
You are are a classifier that identifies dialogue act tags for a given dialogue.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from:
o, qw, ^h, sv, qo, sd, b, +, qy, nn, sd^e, bk, %, bf, ny, ny^r, bh, x, ba, b^r, aa, qy^d, na, b^m, h, nd, ", qh, no, sd^m, ad, sv^e, ng, qy^g, ba^r, ^2, aa^r, sd^t, fc, ft
generating for dialogue 1
generating for dialogue 2
generating for dialogue 3
curr len 75
curr len 115
curr len 594


In [37]:
print(d1_t2_res)
print(d2_t2_res)
print(d3_t2_res)

['o', ' qw', ' sd', ' ^h', ' qy', ' sd', ' sd^m', ' sv', ' sd', ' qy', ' sd^e', ' b', ' +', ' nq', ' qh', ' sd^m', ' sv', ' sd^m', ' bk', ' +', ' qy', ' no', ' qw', ' qw', ' qy^g', ' sd^t', ' na', ' sd^m', ' sd', ' sd^e', ' sd', ' qy^d', ' qh', ' ba^r', ' sv', ' sd^m', ' sd^e', ' sd', ' \\', ' qh', ' qy', ' o', ' qw', ' sd', ' sd^t', ' qh', ' sd', ' br', ' qy', ' na', ' sd^m', ' na', ' ba', ' sv', ' sd', ' sv', ' qy', ' sd', ' sv', ' ba^r', ' qy', ' sd', ' sd^m', ' sd^t', ' sd^m', ' na', ' ba', ' ba^r', ' sd', ' sv', ' qy', ' sd', ' sv', ' qy^d', ' %', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '

In [38]:
dialogue1_res['task2_pred'] = d1_t2_res
dialogue2_res['task2_pred'] = d2_t2_res
dialogue3_res['task2_pred'] = d3_t2_res

## Task 3, few-shot providing list of possible dialogue acts

In [39]:
example_dialogue = pd.read_csv('swda\swda\sw12utt\sw_1203_2229.utt.csv')
example_dialogue = example_dialogue[['caller', 'text', 'act_tag']]

In [40]:
task3_prompt = '''
You are are a classifier that identifies dialogue act tags for a given dialouge.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from:
'''

task3_prompt += ', '.join(all_tags)

task3_prompt += "\n\nHere is an example annotated dialogue:\n\n" + example_dialogue.to_string()

In [41]:
d1_t3_res, d2_t3_res, d3_t3_res = test_prompt(task3_prompt)

using prompt 
You are are a classifier that identifies dialogue act tags for a given dialouge.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from:
o, qw, ^h, sv, qo, sd, b, +, qy, nn, sd^e, bk, %, bf, ny, ny^r, bh, x, ba, b^r, aa, qy^d, na, b^m, h, nd, ", qh, no, sd^m, ad, sv^e, ng, qy^g, ba^r, ^2, aa^r, sd^t, fc, ft

Here is an example annotated dialogue:

    caller                                                                                                                                                          text act_tag
0        A                                   

In [42]:
print(d1_t3_res)
print(d2_t3_res)
print(d3_t3_res)

['qy', ' no', ' sv', ' qy', ' qw', ' qw', ' sd', ' qw', ' sd^e', ' sv', ' sv', ' sv', ' sd', ' qy', ' sd', ' sv', ' sd', ' sd', ' sd', ' sd', ' sd', ' sd', ' sv', ' n', ' sv', ' sd', ' sd', ' sd', ' sd', ' sv', ' sd', ' sv', ' xx', ' ng', ' qy^d', ' qy', ' sd', ' qw', ' sd', ' qw', ' sv', ' sd^e', ' sd', ' sd', ' sd', ' sd', ' sd', ' sv^e', ' sd^e', ' sv', ' qa', ' qy', ' qy', ' sd', ' qy^d', ' sv', ' sv', ' sv', ' sd', ' qy', ' sd^e', ' sv', ' qy', ' sv', ' sv', ' sd', ' sd', ' sd', ' sd', ' sv', ' sv', ' sd^e', ' sv', ' sd', ' sv', ' sv', ' sd', ' sd', ' sv', ' sd', ' sd', ' sv', ' sd', ' qy', ' sd', ' sd', ' sd', ' sd', ' sv^d', ' sd^e', ' sv^h', ' sd', ' sv', ' sd', ' sd', ' sv^e', ' sv^e', ' sd^e', ' sv', ' no', ' sv', ' sv', ' sv', ' qy', ' qh', ' sv', ' sv', ' sd', ' sv', ' ad', ' sd', ' sd', ' sv', ' sv', ' sd', ' sv^e', ' sd', ' sd', ' sd', ' sd', ' sv^m', ' sd', ' sd', ' sv', ' sd', ' sd', ' sv', ' sd', ' sv', ' sd', ' sd', ' sv', ' sv', ' sd', ' sd', ' sd', ' sd', ' sd', ' s

In [43]:
dialogue1_res['task3_pred'] = d1_t3_res
dialogue2_res['task3_pred'] = d2_t3_res
dialogue3_res['task3_pred'] = d3_t3_res

## Creative ways to improve performance, 

generating gpt description of each dialogue act

In [44]:
ex_dialogue1 = pd.read_csv('swda\swda\sw12utt\sw_1204_2434.utt.csv')[['caller', 'text', 'act_tag']]
ex_dialogue2 = pd.read_csv('swda\swda\sw12utt\sw_1205_2441.utt.csv')[['caller', 'text', 'act_tag']]
ex_dialogue3 = pd.read_csv('swda\swda\sw12utt\sw_1206_2461.utt.csv')[['caller', 'text', 'act_tag']]


In [45]:
def generate_description(dialogue_act):
    print("generating description for", dialogue_act)
    
    prompt = '''
    Given a dialogue act tag and a dialogue, generate a conscise, prescise, and informative description of the dialogue act tag.
    The dialogue is between two speakers, A and B.
    The dialogue is represented in a table. The table has three columns, 'caller', 'text' and 'act_tag'.
    Return only the description of the dialogue act tag and nothing more.
    Keep your responses to under 25 words.
    
    Here is the dialogue table:
    '''
    
    dialogue = ""
    
    if dialogue_act in dialogue1_targets.unique():
        dialogue = dialogue1.to_string()
    elif dialogue_act in dialogue2_targets.unique():
        dialogue = dialogue2.to_string()
    else:
        dialogue = dialogue3.to_string()
    
    prompt += dialogue
    
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": "write a description of the dialogue act tag " + dialogue_act}
        ]
    )
    
    return completion.choices[0].message.content

In [46]:
tag_descriptions = []
for tag in all_tags:
    tag_descriptions.append(generate_description(tag))

generating description for o
generating description for qw
generating description for ^h
generating description for sv
generating description for qo
generating description for sd
generating description for b
generating description for +
generating description for qy
generating description for nn
generating description for sd^e
generating description for bk
generating description for %
generating description for bf
generating description for ny
generating description for ny^r
generating description for bh
generating description for x
generating description for ba
generating description for b^r
generating description for aa
generating description for qy^d
generating description for na
generating description for b^m
generating description for h
generating description for nd
generating description for "
generating description for qh
generating description for no
generating description for sd^m
generating description for ad
generating description for sv^e
generating description for ng
gener

In [47]:
print(tag_descriptions)

["The tag 'o' indicates an overview or summary statement, typically encapsulating previous dialogue or thoughts succinctly.", 'The "qw" tag represents a question by the speaker seeking information or clarification from the other participant in the dialogue.', 'The dialogue act tag ^h indicates a hesitation or filler sound used by a speaker while they are thinking or searching for words.', 'The "sv" tag indicates a statement that provides information or expresses an opinion.', 'The "qo" tag indicates a question posed by a speaker, seeking information or clarification from the other participant.', 'The "sd" tag indicates a statement that conveys information or an opinion in the dialogue.', "The tag 'B' indicates responses that often involve agreement, acknowledgment, or elaboration by the second speaker in a dialogue.", "The '+' dialogue act tag indicates a continuation or addition to a previous thought or statement made by the speaker.", 'The tag "qy" indicates a question posed by a spe

In [48]:
# run only once:
# pd.DataFrame({"tag": all_tags, "description": tag_descriptions}).to_csv("dialogue_act_tag_descriptions.csv", index=False)

In [49]:
act_tag_descriptions = pd.read_csv("dialogue_act_tag_descriptions.csv")

In [50]:
task4_prompt = '''
You are are a classifier that identifies dialogue act tags for a given dialouge.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from followed by their descriptions:
'''

task4_prompt += act_tag_descriptions.to_string()

In [51]:
d1_t4_res, d2_t4_res, d3_t4_res = test_prompt(task4_prompt)

using prompt 
You are are a classifier that identifies dialogue act tags for a given dialouge.
The dialogue is between two speakers, A and B.
Return only the dialouge acts of speaker A and B in order of appearance.
Separate the dialogue acts of speaker A and B with a comma.
The user will send a table with the columns 'caller' and 'text'.
The caller column will have the values 'A' and 'B' and designates which speaker is speaking.
The text column will have the dialogue that speaker 'A' or 'B' is saying.

Here are possible dialouge act tags you can choose from followed by their descriptions:
     tag                                                                                                                                                                                description
0      o                                                                                                                    The "O" tag indicates an off-topic or unrelated remark in the dialogue.
1     qw    

In [52]:
dialogue1_res['task4_pred'] = d1_t4_res
dialogue2_res['task4_pred'] = d2_t4_res
dialogue3_res['task4_pred'] = d3_t4_res

## Analyze Results

In [53]:
dialogue1_res.to_csv('dialogue1_results.csv', index=False)
dialogue2_res.to_csv('dialogue2_results.csv', index=False)
dialogue3_res.to_csv('dialogue3_results.csv', index=False)

In [54]:
def calculate_match_percentage(df, col1, col2):
    """
    Calculate the percentage of matching values between two columns in a DataFrame.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the columns to compare.
    col1 (str): The name of the first column.
    col2 (str): The name of the second column.

    Returns:
    float: The percentage of matching values between the two columns.
    """
    if col1 not in df.columns or col2 not in df.columns:
        raise ValueError("One or both columns are not in the DataFrame")

    total_rows = len(df)
    if total_rows == 0:
        return 0.0

    matches = df[col1] == df[col2]
    match_count = matches.sum()

    return (match_count / total_rows) * 100


In [55]:
print(dialogue3_res)

    caller                                               text act_tag  \
0        B                                            Okay. /       o   
1        A                                            Okay. /     b^m   
2        B  {D Well } what do you think about the idea of,...      qw   
3        B                Do you think it's a <breathing>, -/      qy   
4        A  {D Well, } [ I, + I  ] think it's a pretty goo...      sv   
..     ...                                                ...     ...   
97       A                                   enjoy the day. /       +   
98       B                                        You, too. /      fc   
99       A                               Thank you, ma'am.  /      ft   
100      A                                         Bye-bye. /      fc   
101      B                                         Bye-bye. /      fc   

    task1_pred task2_pred task3_pred task4_pred  
0        Okay.          o         qw         sv  
1        Okay.         

In [56]:
print("Task 1 Match Percentage:")
print(calculate_match_percentage(dialogue1_res, 'act_tag', 'task1_pred'))
print(calculate_match_percentage(dialogue2_res, 'act_tag', 'task1_pred'))
print(calculate_match_percentage(dialogue3_res, 'act_tag', 'task1_pred'))

print(r"\n[-^10]\n")

print("Task 2 Match Percentage:")
print(calculate_match_percentage(dialogue1_res, 'act_tag', 'task2_pred'))
print(calculate_match_percentage(dialogue2_res, 'act_tag', 'task2_pred'))
print(calculate_match_percentage(dialogue3_res, 'act_tag', 'task2_pred'))

print("Task 3 Match Percentage:")
print(calculate_match_percentage(dialogue1_res, 'act_tag', 'task3_pred'))
print(calculate_match_percentage(dialogue2_res, 'act_tag', 'task3_pred'))
print(calculate_match_percentage(dialogue3_res, 'act_tag', 'task3_pred'))

print("Task 4 Match Percentage:")
print(calculate_match_percentage(dialogue1_res, 'act_tag', 'task4_pred'))
print(calculate_match_percentage(dialogue2_res, 'act_tag', 'task4_pred'))
print(calculate_match_percentage(dialogue3_res, 'act_tag', 'task4_pred'))

Task 1 Match Percentage:
0.0
0.0
0.0
\n[-^10]\n
Task 2 Match Percentage:
0.39215686274509803
0.0
0.9803921568627451
Task 3 Match Percentage:
0.0
0.25773195876288657
0.0
Task 4 Match Percentage:
0.0
0.0
0.0
