In [None]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import json
from tqdm import tqdm

import importlib
import sys; sys.path.append("../src")
import politeness
importlib.reload(politeness)
from politeness import PolitenessExample, get_llm_generated_answer, isolate_individual_features, distill_relevant_features, calculate_expert_alignment_score, run_pipeline, load_politeness_data

### Load Politeness Data

In [None]:
politeness_data =  load_dataset("BrachioLab/multilingual_politeness")
politeness_data = politeness_data['train'].to_pandas()
politeness_data = politeness_data[politeness_data['language'] == "english"].sample(1, random_state=11).reset_index(drop=True)

In [None]:
politeness_data['Utterance'][0]

### Stage 0: Get LLM Explanations

In [None]:
politeness_examples = []
for idx,row in tqdm(politeness_data.iterrows()):
    rating, explanation = get_llm_generated_answer(row['Utterance'], "subq")
    if rating is None:
        continue
    politeness_examples.append(PolitenessExample(
        utterance=row['Utterance'],
        ground_truth=float(row['politeness']) + 3,
        llm_score=rating,
        llm_explanation=explanation
    ))

In [None]:
politeness_examples[0].llm_score

In [None]:
politeness_examples[0].llm_explanation

### Stage 1: Atomic claim extraction

In [None]:
for example in politeness_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [None]:
politeness_examples[0].claims

### Stage 2: Distill relevant claims

In [None]:
for example in politeness_examples:
    relevant_claims = distill_relevant_features(example)
    example.relevant_claims = relevant_claims

In [None]:
politeness_examples[0].relevant_claims

### Stage 3: Calculate alignment scores

In [None]:
for example in politeness_examples:
    alignment_scores = []
    alignment_categories = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment_score = np.mean(alignment_scores)
    

In [None]:
politeness_examples[0].alignment_scores

In [None]:
politeness_examples[0].alignment_categories

In [None]:
politeness_examples[0].final_alignment_score