In [40]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import time
from tqdm import tqdm

import importlib
import sys; sys.path.append("../src")
import politeness
importlib.reload(politeness)
from politeness import PolitenessExample, get_llm_generated_answer, isolate_individual_features, distill_relevant_features, calculate_expert_alignment_score

### Load Politeness Data

In [2]:
politeness_data =  load_dataset("BrachioLab/multilingual_politeness")
politeness_data = politeness_data['train'].to_pandas()
politeness_data = politeness_data[politeness_data['language'] == "english"].sample(3, random_state=11).reset_index(drop=True)

In [3]:
politeness_data['Utterance'][0]

'This talk page is not the 2004 film version, this page is based on the story. But anyway, the film was great. I think some people are too critical about some things.'

### Stage 0: Get LLM Explanations

In [4]:
politeness_examples = []
for idx,row in tqdm(politeness_data.iterrows()):
    rating, explanation = get_llm_generated_answer(row['Utterance'])
    if rating is None:
        continue
    politeness_examples.append(PolitenessExample(
        utterance=row['Utterance'],
        ground_truth=float(row['politeness']) + 3,
        llm_score=rating,
        llm_explanation=explanation
    ))

3it [00:06,  2.03s/it]


In [5]:
politeness_examples[0].llm_score

'3'

In [6]:
politeness_examples[0].llm_explanation

"The utterance is neutral, as it simply conveys information and personal opinion without any language indicative of politeness or rudeness. The speaker's choice of words does not offend, but also lacks markers of politeness such as gratitude or apology."

### Stage 1: Atomic claim extraction

In [7]:
for example in politeness_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [8]:
politeness_examples[0].claims

['The utterance is neutral.',
 'The utterance simply conveys information and personal opinion.',
 'The utterance contains no language indicative of politeness.',
 'The utterance contains no language indicative of rudeness.',
 "The speaker's choice of words does not offend.",
 'The utterance lacks markers of politeness such as gratitude or apology.']

### Stage 2: Distill relevant claims

In [9]:
for example in politeness_examples:
    relevant_claims = distill_relevant_features(example)
    example.relevant_claims = relevant_claims

100%|██████████| 6/6 [00:13<00:00,  2.31s/it]
100%|██████████| 6/6 [00:12<00:00,  2.04s/it]
100%|██████████| 7/7 [00:16<00:00,  2.32s/it]


In [10]:
politeness_examples[0].relevant_claims

['The utterance simply conveys information and personal opinion.',
 'The utterance contains no language indicative of rudeness.',
 'The utterance lacks markers of politeness such as gratitude or apology.']

### Stage 3: Calculate alignment scores

In [None]:
for example in politeness_examples:
    alignment_scores = []
    alignment_categories = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment = np.mean(alignment_scores)
    

100%|██████████| 3/3 [00:00<00:00, 1106.77it/s]
100%|██████████| 5/5 [00:00<00:00, 1428.97it/s]
100%|██████████| 5/5 [00:00<00:00, 1522.32it/s]


In [42]:
politeness_examples[0].alignment_scores

[0.8, 0.8, 0.8]

In [43]:
politeness_examples[0].alignment_categories

['First-Person Subjectivity Markers',
 'Avoidance of Profanity or Negative Emotion',
 'Gratitude Expressions']

In [14]:
politeness_examples[0].final_alignment

0.6999999999999998