In [7]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import time
from tqdm import tqdm

import importlib
import sys; sys.path.append("../src")
import politeness
importlib.reload(politeness)
from politeness import PolitenessExample, get_llm_generated_answer, isolate_individual_features, distill_relevant_features, calculate_expert_alignment_score

### Load Politeness Data

In [2]:
politeness_data =  load_dataset("BrachioLab/multilingual_politeness")
politeness_data = politeness_data['train'].to_pandas()
politeness_data = politeness_data[politeness_data['language'] == "english"].sample(1, random_state=11).reset_index(drop=True)

In [3]:
politeness_data['Utterance'][0]

'This talk page is not the 2004 film version, this page is based on the story. But anyway, the film was great. I think some people are too critical about some things.'

### Stage 0: Get LLM Explanations

In [8]:
politeness_examples = []
for idx,row in tqdm(politeness_data.iterrows()):
    rating, explanation = get_llm_generated_answer(row['Utterance'], "subq")
    if rating is None:
        continue
    politeness_examples.append(PolitenessExample(
        utterance=row['Utterance'],
        ground_truth=float(row['politeness']) + 3,
        llm_score=rating,
        llm_explanation=explanation
    ))

1it [00:02,  2.87s/it]


In [5]:
politeness_examples[0].llm_score

'3'

In [12]:
politeness_examples[0].llm_explanation

'The first subquestion to consider is whether the utterance contains any direct forms of rudeness or negative language; in this case, it does not, as it merely expresses an opinion. Next, does the utterance include any positive or polite language, such as compliments or respectful terms? The phrase "the film was great" conveys a positive sentiment, although it lacks any formal markers of politeness. Finally, examine if the utterance maintains a neutral tone without sarcasm or condescension, which it does. These considerations suggest a neutral politeness level.'

### Stage 1: Atomic claim extraction

In [13]:
for example in politeness_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [14]:
politeness_examples[0].claims

['The utterance does not contain any direct forms of rudeness or negative language.',
 'The utterance merely expresses an opinion.',
 'The phrase "the film was great" conveys a positive sentiment.',
 'The phrase "the film was great" lacks any formal markers of politeness.',
 'The utterance maintains a neutral tone without sarcasm or condescension.',
 'The considerations suggest a neutral politeness level.']

### Stage 2: Distill relevant claims

In [15]:
for example in politeness_examples:
    relevant_claims = distill_relevant_features(example)
    example.relevant_claims = relevant_claims

100%|██████████| 6/6 [00:10<00:00,  1.78s/it]


In [16]:
politeness_examples[0].relevant_claims

['The utterance does not contain any direct forms of rudeness or negative language.',
 'The utterance merely expresses an opinion.',
 'The phrase "the film was great" lacks any formal markers of politeness.',
 'The utterance maintains a neutral tone without sarcasm or condescension.']

### Stage 3: Calculate alignment scores

In [14]:
for example in politeness_examples:
    alignment_scores = []
    alignment_categories = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment = np.mean(alignment_scores)
    

100%|██████████| 3/3 [00:08<00:00,  2.82s/it]
100%|██████████| 6/6 [00:17<00:00,  2.89s/it]
100%|██████████| 3/3 [00:14<00:00,  4.95s/it]


In [15]:
politeness_examples[0].alignment_scores

[0.2, 0.9, 0.8]

In [16]:
politeness_examples[0].alignment_categories

['Discourse Management with Markers',
 'First-Person Subjectivity Markers',
 'First-Person Subjectivity Markers']

In [28]:
politeness_examples[0].final_alignment

0.8200000000000001