In [1]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import time
from tqdm import tqdm

from diskcache import Cache
cache = Cache("/shared_data0/llm_cachedir")


  from .autonotebook import tqdm as notebook_tqdm


### OpenAI Querying Functions

In [3]:
@cache.memoize()
def query_openai(prompt, model="gpt-4o"):
    with open("../API_KEY.txt", "r") as file:
        api_key = file.read()
    client = OpenAI(api_key=api_key)

    num_tries = 0
    for i in range(3):
        try:
            translation = client.chat.completions.create(
                messages=[{
                    "role": "user",
                    "content": prompt,
                }],
                model=model,
            )
            return translation.choices[0].message.content
        except Exception as e:
            num_tries += 1
            print("Try {}; Error: {}".format(str(num_tries), str(e)))     
            time.sleep(3)
    return "ERROR"

### Load Politeness Data

In [4]:
politeness_data =  load_dataset("BrachioLab/multilingual_politeness")
politeness_data = politeness_data['train'].to_pandas()
politeness_data = politeness_data[politeness_data['language'] == "english"].sample(3, random_state=11).reset_index(drop=True)

class PolitenessExample:
    def __init__(self, utterance, ground_truth, llm_score, llm_explanation):
        self.utterance = utterance
        self.ground_truth = ground_truth
        self.llm_score = llm_score
        self.llm_explanation = llm_explanation
        self.claims = []
        self.relevant_claims = []
        self.alignment_scores = []

In [5]:
politeness_data['Utterance'][0]

'This talk page is not the 2004 film version, this page is based on the story. But anyway, the film was great. I think some people are too critical about some things.'

### Stage 0: Get LLM Explanations

In [6]:
explanation_prompt = """What is the politeness of the following utterance? Use the following 1-5 scale:
1: extremely rude
2: somewhat rude
3: neutral
4: somewhat polite
5: extremely polite

In addition, provide a paragraph explaining why you gave the utterance that rating. Your response should be 2 lines, formatted as follows:
Rating: <rating>
Explanation: <explanation>

Utterance: {}
"""
def get_llm_generated_answer(utterance: str):
    prompt = explanation_prompt.format(utterance)
    response = query_openai(prompt)
    if response == "ERROR":
        print("Error in querying OpenAI API")
        return None
    rating = response.split("\n")[0].split("Rating: ")[1].strip()
    explanation = response.split("\n")[1].split("Explanation: ")[1].strip()
    return rating, explanation

politeness_examples = []
for idx,row in tqdm(politeness_data.iterrows()):
    rating, explanation = get_llm_generated_answer(row['Utterance'])
    if rating is None:
        continue
    politeness_examples.append(PolitenessExample(
        utterance=row['Utterance'],
        ground_truth=float(row['politeness']) + 3,
        llm_score=rating,
        llm_explanation=explanation
    ))

3it [00:00, 198.61it/s]


In [7]:
politeness_examples[0].llm_score

'3'

In [8]:
politeness_examples[0].llm_explanation

'The utterance is neutral as it provides clarification and expresses a personal opinion without using overtly polite or rude language. It acknowledges a difference without disrespect, although it lacks elements of politeness like gratitude or praise.'

### Stage 1: Atomic claim extraction

In [None]:
claim_prompt = """
You will be given a paragraph that explains why a certain level of politeness was attributed to an utterance. Your task is to decompose this explanation into individual claims that are:

Atomic: Each claim should express only one clear idea or judgment.
Standalone: Each claim should be self-contained and understandable without needing to refer back to the paragraph.
Faithful: The claims must preserve the original meaning, nuance, and tone. Do not omit hedging language (e.g., "seems to," "somewhat," "lacks overt markers") or subjective phrasing if present.

Format your output as a list of claims separated by new lines. Do not include any additional text or explanations.

Here is an example of how to format your output:

INPUT: This utterance is formal and professional, with no overtly rude language. The phrasing is neutral-to-polite, as it avoids accusatory or dismissive tones. The use of "I am copying them here" is transparent and non-confrontational, and "seem to constitute" softens any potential imposition by acknowledging some level of subjectivity. However, it lacks explicit politeness markers such as "please" or "thank you," which would elevate it to "extremely polite."

OUTPUT:
The utterance is formal and professional.
The utterance contains no overtly rude language.
The phrasing is neutral-to-polite because it avoids accusatory or dismissive tones.
The phrase "I am copying them here" is transparent and non-confrontational.
The phrase "seem to constitute" softens any potential imposition by acknowledging subjectivity.
The utterance lacks explicit politeness markers such as "please" or "thank you."
The lack of explicit politeness markers prevents the utterance from being considered "extremely polite."

Now decompose the following paragraph into atomic, standalone claims:
INPUT: {}
"""


def isolate_individual_features(explanation: str):
    prompt = claim_prompt.format(explanation)
    response = query_openai(prompt)
    if response == "ERROR":
        print("Error in querying OpenAI API")
        return None
    response = response.replace("OUTPUT:", "").strip()
    claims = response.split("\n")
    return claims

for example in politeness_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [10]:
politeness_examples[0].claims

['The utterance is neutral.',
 'The utterance provides clarification.',
 'The utterance expresses a personal opinion.',
 'The utterance does not use overtly polite language.',
 'The utterance does not use overtly rude language.',
 'The utterance acknowledges a difference without disrespect.',
 'The utterance lacks elements of politeness such as gratitude.',
 'The utterance lacks elements of politeness such as praise.']

### Stage 2: Distill relevant claims

In [11]:
relevance_prompt = """You will be given an utterance, its politeness rating on a 1-5 scale (where 1: very rude and 5: very polite), and a claim that may or may not be relevant to an explanation of the rating. Your task is to decide whether the claim is relevant to explaining the politeness rating for this specific utterance.

A claim is relevant if and only if:
(1) It is supported by the content of the utterance (i.e., it does not hallucinate or speculate beyond what is said).
(2) It helps explain why the utterance received the given politeness rating (i.e., it directly relates to tone, phrasing, or other aspects relevant to the rating).

Return your answer as:
Relevance: <Yes/No>
Reasoning: <A brief explanation of your judgment, pointing to specific support or lack thereof>

Here are some examples:

[Example 1]
Utterance: "There is no such fact - you are just making things up. There is no reason to believe that any person reading about Bologna would be particularly interested in Kappa Sigma. If they wanted to know about Kappa Sigma, they would read the Kappa Sigma article instead.."
Politeness Rating: 2
Claim: The utterance accuses the other person of fabricating information.
Relevance: Yes
Reasoning: The claim is relevant because it discusses the accusatory tone of the utterance, which contributes to its rudeness classification.

[Example 2]
Utterance: "Deleted reference to REM sleep in the first sentence. It simply is not true. In fact, REM deprivation is a common side effect of antidepressant use (some attribute their effects to REM deprivation)."
Politeness Rating: 3
Claim: The utterance is neutral.
Relevance: No
Reasoning: The claim is not relevant because it simply states the rating, and does not provide information about why the rating was given. Claims that merely state the rating are not relevant.

[Example 3]
Utterance: "Tetra-gram is a compound word as is the penta-gram. Penta refers to the number 5 in Greek, tetra refers to the number 4 and gram refers to the word line in both cases. Obviously a star shape can't be shaped with 4 lines."
Politeness Rating: 3
Claim: The use of "obviously" might suggest the author is an expert in Greek.
Relevance: No
Reasoning: The claim is not relevant because there is nothing in the text to support that the author may be Greek. 

Now, determine whether the following claim is relevant to the given utterance and politeness rating:
Utterance: {}
Politeness Rating: {}
Claim: {}
"""

def is_claim_relevant(utterance: str, rating: str, claim: str):
    prompt = relevance_prompt.format(utterance, rating, claim)
    response = query_openai(prompt)
    if response == "ERROR":
        print("Error in querying OpenAI API")
        return None
    response = response.replace("Relevance:", "").strip()
    response = response.split("\n")
    relevance = response[0].strip()
    reasoning = response[1].replace("Reasoning:", "").strip()
    return relevance, reasoning


def distill_relevant_features(example: PolitenessExample):
    relevant_claims = []
    for claim in tqdm(example.claims):
        relevance, reasoning = is_claim_relevant(example.utterance, example.llm_score, claim)
        if relevance is None:
            continue
        if relevance == "Yes":
            relevant_claims.append(claim)
    return relevant_claims

for example in politeness_examples:
    relevant_claims = distill_relevant_features(example)
    example.relevant_claims = relevant_claims

100%|██████████| 8/8 [00:00<00:00, 562.18it/s]
100%|██████████| 6/6 [00:00<00:00, 690.86it/s]
100%|██████████| 5/5 [00:00<00:00, 1134.70it/s]


In [12]:
politeness_examples[0].relevant_claims

['The utterance provides clarification.',
 'The utterance expresses a personal opinion.',
 'The utterance does not use overtly polite language.',
 'The utterance does not use overtly rude language.',
 'The utterance acknowledges a difference without disrespect.',
 'The utterance lacks elements of politeness such as gratitude.']

### Stage 3: Calculate alignment scores

In [19]:
alignment_prompt = """You will be given an utterance, its politeness rating on a 1-5 scale (where 1: very rude and 5: very polite), and a claim that relates to why that rating was given. You will also be given a series of lexical categories that relate to politeness.

Your task is as follows:
1. Determine which lexical category is most aligned with the claim. 
2. Rate how strongly the category aligns with the claim on a scale of 1-5 (1 being lowest, 5 being highest). 

Return your answer as:
Category: <category>
Category Alignment Rating: <rating>
Reasoning: <A brief explanation of why you selected the chosen category and why you judged the alignment rating as you did.>

-----
Lexical categories:
1. Apologetic: Words and phrases used to acknowledge mistakes or express regret. (sorry, woops, oops, sry, apologize)
2. Deference: Polite words that convey respect, admiration, or acknowledgment of someone's status or authority. (great, good, nice, interesting, cool)
3. Direct Question: Words commonly used to form explicit questions seeking information or clarification. (what, where, why, who, when)
4. Discourse Marker: Common transition words used to structure speech or writing. (so, then, and, but, or)
5. Emergency: Phrases indicating urgency, immediate attention, or emergency situations. (right now, rn, as soon as possible, asap, immediately)
6. Factuality: Expressions that assert factual information or emphasize reality. (in fact, actually, the point, the reality, the truth)
7. First Person Plural: Sentences that contain a first-person plural pronoun. (we, our, ours, us, ourselves)
8. First Person Singular: Sentences that contain a first-person singular pronoun. (I, my, mine, myself, me)
9. First Person Start: Sentences that begin with a first-person singular pronoun. (I, my, mine, myself)
10. Gratitude: Words and phrases that express appreciation and thankfulness. (thanks, thx, thank you, thank u, i appreciate)
11. Greeting: Words and phrases used to initiate interaction or acknowledge someone’s presence. (hi, hello, hey)
12. Negative Emotion: Words and expressions that convey strong negative emotions or discontent. (bullshit, fuck, fucking, damn, shit)
13. Positive Emotion: Words and expressions that convey happiness, excitement, or approval. (abound, prefer, pride, priceless, pretty)
14. Hedging: Words that soften statements, making them less direct or assertive. (think, usually, unclearly, unclear, uncertainly)
15. Directive Speech Act: Expressions that instruct, command, or request an action from the listener. (can you, will you, can u, will u)
16. Indirectness: Words that introduce indirectness in communication, often for politeness or subtlety. (btw, by the way)
17. Ingroup Identity: Words that signal belonging to a specific social group or community. (mate, bro, homie, dude)
18. Politeness Marker: Words that make a request or instruction more courteous. (please, pls, plz, plse)
19. Polite Start: Sentences that begin with a politeness marker. (please, pls, plz)
20. Praise: Expressions that convey approval, admiration, or compliments. (awesome, outstanding, excellent, great, neat)
21. Commitment Marker: Words that express certainty or a strong commitment to an action or belief. (must, definitely, sure, definite, surely)
22. Second Person: Sentences that contain a second-person pronoun. (you, your, yours, yourself, u)
23. Second Person Start: Sentences that begin with a second-person pronoun. (you, your, yours, yourself)
24. Polite Request: Phrases that express politeness in requests or suggestions using modal verbs. (could you, would you, could u, would u)
25. Togetherness: Words that emphasize unity, collective action, or inclusivity. (together)
26. Direct Address: Words directly addressing the listener in conversation. (you, u)
-----

Here are some examples:
[Example 1]
Utterance: "There is no such fact - you are just making things up. There is no reason to believe that any person reading about Bologna would be particularly interested in Kappa Sigma. If they wanted to know about Kappa Sigma, they would read the Kappa Sigma article instead.."
Politeness Rating: 2
Claim: The utterance accuses the other person of fabricating information.
Category: Negative Emotion
Category Alignment Rating: 4
Reasoning: The accusatory tone and claim of fabrication imply a confrontational or hostile interaction, which strongly aligns with negative emotion. While the emotion is more implicit than explicit profanity or insult, the accusatory framing still carries a strong negative charge.

[Example 2]
Utterance: "Deleted reference to REM sleep in the first sentence. It simply is not true. In fact, REM deprivation is a common side effect of antidepressant use (some attribute their effects to REM deprivation)."
Politeness Rating: 3
Claim: The sentence structure is overly complex and difficult to follow.
Category: Discourse Marker
Category Alignment Rating: 1
Reasoning: The claim is about sentence complexity and structure, which does not relate to any of the listed lexical categories, including Discourse Marker, which refers to specific connecting words like “so” or “but.” The utterance does not exhibit structural markers that would directly contribute to complexity based on the provided categories, making the alignment very weak.

[Example 3]
Utterance: "Tetra-gram is a compound word as is the penta-gram. Penta refers to the number 5 in Greek, tetra refers to the number 4 and gram refers to the word line in both cases. Obviously a star shape can't be shaped with 4 lines."
Politeness Rating: 3
Claim: The use of "obviously" might suggest a slight assumption of common knowledge.
Category: Factuality
Category Alignment Rating: 3
Reasoning: The utterance presents factual information about word origins and geometric logic, and the claim focuses on the use of "obviously," which implies assumed knowledge rather than asserting a fact. This aligns somewhat with Factuality, but not strongly—Factuality is involved, but the assumption of common knowledge is more about tone than fact.

Now, determine the category and alignment rating for the following claim:
Utterance: {}
Politeness Rating: {}
Claim: {}
"""

def calculate_expert_alignment_score(utterance: str, rating: str, claim: str):
    prompt = alignment_prompt.format(utterance, rating, claim)
    response = query_openai(prompt)
    if response == "ERROR":
        print("Error in querying OpenAI API")
        return None
    response = response.replace("Category:", "").strip()
    response = response.split("\n")
    category = response[0].strip()
    alignment_score = response[1].replace("Category Alignment Rating:", "").strip()
    reasoning = response[2].replace("Reasoning:", "").strip()
    return category, alignment_score, reasoning

for example in politeness_examples:
    alignment_scores = []
    alignment_categories = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(example.utterance, example.llm_score, claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment = np.mean([float(score) for score in alignment_scores])
    

100%|██████████| 6/6 [00:00<00:00, 1336.40it/s]
100%|██████████| 4/4 [00:00<00:00, 1679.74it/s]
100%|██████████| 4/4 [00:00<00:00, 1679.23it/s]


In [20]:
politeness_examples[0].alignment_scores

['4', '4', '2', '1', '4', '4']

In [21]:
politeness_examples[0].alignment_categories

['Factuality',
 'First Person Singular',
 'Hedging',
 'Negative Emotion',
 'Hedging',
 'Gratitude']

In [22]:
politeness_examples[0].final_alignment

3.1666666666666665