In [None]:
!pip install convokit
!pip install transformers
!pip install scikit-learn
!pip install pandas
!pip install numpy
!pip install openai


Collecting convokit
  Downloading convokit-3.0.0.tar.gz (183 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/183.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m153.6/183.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.2/183.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting msgpack-numpy>=0.4.3.2 (from convokit)
  Downloading msgpack_numpy-0.4.8-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting dill>=0.2.9 (from convokit)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting clean-text>=0.6.0 (from convokit)
  Downloading clean_text-0.6.0-py3-none-any.whl.metadata (6.6 kB)
Collecting unidecode>=1.1.1 (from convokit)
  Dow

In [None]:
from convokit import Corpus, download
se_corpus = Corpus(filename=download("stack-exchange-politeness-corpus"))


Downloading stack-exchange-politeness-corpus to /root/.convokit/downloads/stack-exchange-politeness-corpus
Downloading stack-exchange-politeness-corpus from http://zissou.infosci.cornell.edu/convokit/datasets/stack-exchange-politeness-corpus/stack-exchange-politeness-corpus.zip (2.3MB)... Done
No configuration file found at /root/.convokit/config.yml; writing with contents: 
# Default Backend Parameters
db_host: localhost:27017
data_directory: ~/.convokit/saved-corpora
default_backend: mem


In [None]:
wiki_corpus = Corpus(filename=download("wikipedia-politeness-corpus"))


Downloading wikipedia-politeness-corpus to /root/.convokit/downloads/wikipedia-politeness-corpus
Downloading wikipedia-politeness-corpus from http://zissou.infosci.cornell.edu/convokit/datasets/wikipedia-politeness-corpus/wikipedia-politeness-corpus.zip (1.7MB)... Done


In [None]:
import pandas as pd

def extract_politeness_data(corpus):
    data = []
    for utt in corpus.iter_utterances():
        text = utt.text
        score = utt.meta['Normalized Score']
        data.append({'text': text, 'score': score})
    return pd.DataFrame(data)


In [None]:
se_df = extract_politeness_data(se_corpus)
wiki_df = extract_politeness_data(wiki_corpus)


In [None]:
def label_data(df):
    q1 = df['score'].quantile(0.25)
    q3 = df['score'].quantile(0.75)

    def label_score(score):
        if score <= q1:
            return 'impolite'
        elif score >= q3:
            return 'polite'
        else:
            return 'neutral'

    df['label'] = df['score'].apply(label_score)
    return df

se_df = label_data(se_df)
wiki_df = label_data(wiki_df)


In [None]:
def filter_data(df):
    return df[df['label'] != 'neutral'].reset_index(drop=True)

se_df_filtered = filter_data(se_df)
wiki_df_filtered = filter_data(wiki_df)


In [None]:
from sklearn.model_selection import train_test_split

def split_data(df):
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
    return train_df, test_df

se_train_df, se_test_df = split_data(se_df_filtered)
wiki_train_df, wiki_test_df = split_data(wiki_df_filtered)

In [None]:
from convokit import PolitenessStrategies

ps = PolitenessStrategies()

def extract_features(corpus, df):
    corpus = ps.transform(corpus)
    features = []
    labels = []
    for utt in corpus.iter_utterances():
        if utt.text in df['text'].values:
            features.append(utt.meta['politeness_strategies'])
            labels.append(df[df['text'] == utt.text]['label'].values[0])
    feature_df = pd.DataFrame(features)
    feature_df['label'] = labels
    return feature_df.fillna(0)


In [None]:
def update_corpus(corpus, df):
    texts = set(df['text'].values)
    utterances = [utt for utt in corpus.iter_utterances() if utt.text in texts]
    corpus_filtered = Corpus(utterances=utterances)
    return corpus_filtered


In [None]:
se_train_corpus = update_corpus(se_corpus, se_train_df)
se_test_corpus = update_corpus(se_corpus, se_test_df)

wiki_train_corpus = update_corpus(wiki_corpus, wiki_train_df)
wiki_test_corpus = update_corpus(wiki_corpus, wiki_test_df)

In [None]:
se_train_features = extract_features(se_train_corpus, se_train_df)
se_test_features = extract_features(se_test_corpus, se_test_df)

wiki_train_features = extract_features(wiki_train_corpus, wiki_train_df)
wiki_test_features = extract_features(wiki_test_corpus, wiki_test_df)


In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

def train_svm(train_features):
    X_train = train_features.drop('label', axis=1)
    y_train = train_features['label']
    clf = LinearSVC()
    clf.fit(X_train, y_train)
    return clf


In [None]:
se_clf = train_svm(se_train_features)
wiki_clf = train_svm(wiki_train_features)

In [None]:
def evaluate_model(clf, test_features):
    X_test = test_features.drop('label', axis=1)
    y_test = test_features['label']
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

se_ling_accuracy = evaluate_model(se_clf, se_test_features)
wiki_ling_accuracy = evaluate_model(wiki_clf, wiki_test_features)

print(f"Stack Exchange Ling Model Accuracy: {se_ling_accuracy*100:.2f}%")
print(f"Wikipedia Ling Model Accuracy: {wiki_ling_accuracy*100:.2f}%")


Stack Exchange Ling Model Accuracy: 57.74%
Wikipedia Ling Model Accuracy: 73.58%


In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
from torch.utils.data import Dataset
import torch
class PolitenessDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(texts.tolist(), truncation=True, padding=True, max_length=128)
        self.labels = labels.apply(lambda x: 1 if x == 'polite' else 0).tolist()

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx]).long()
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
se_train_dataset = PolitenessDataset(se_train_df['text'], se_train_df['label'])
se_test_dataset = PolitenessDataset(se_test_df['text'], se_test_df['label'])

wiki_train_dataset = PolitenessDataset(wiki_train_df['text'], wiki_train_df['label'])
wiki_test_dataset = PolitenessDataset(wiki_test_df['text'], wiki_test_df['label'])


In [None]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

def train_bert(train_dataset, eval_dataset):
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        evaluation_strategy="epoch",
        logging_dir='./logs',
        logging_steps=10,
        load_best_model_at_end=True,
        metric_for_best_model='accuracy',
        greater_is_better=True,
        save_total_limit=1,
        # The fix: Set save_strategy to "epoch" to match evaluation_strategy
        save_strategy="epoch",
    )

    def compute_metrics(p):
        preds = p.predictions.argmax(-1)
        labels = p.label_ids
        acc = (preds == labels).mean()
        return {'accuracy': acc}

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    return trainer

In [None]:
!pip install torch



In [None]:
print("Training BERT on Stack Exchange data:")
se_trainer = train_bert(se_train_dataset, se_test_dataset)


Training BERT on Stack Exchange data:


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.625,0.586341,0.689864
2,0.4232,0.747455,0.629349
3,0.2178,0.873713,0.688351


In [None]:
print("Training BERT on Wikipedia data:")
wiki_trainer = train_bert(wiki_train_dataset, wiki_test_dataset)


Training BERT on Wikipedia data:


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3472,0.332483,0.87156
2,0.1011,0.464338,0.876147


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3472,0.332483,0.87156
2,0.1011,0.464338,0.876147


In [None]:
def evaluate_bert(trainer, test_dataset):
    metrics = trainer.evaluate(eval_dataset=test_dataset)
    accuracy = metrics['eval_accuracy']
    print(f"Test Accuracy: {accuracy*100:.2f}%")
    return accuracy

se_bert_accuracy = evaluate_bert(se_trainer, se_test_dataset)
wiki_bert_accuracy = evaluate_bert(wiki_trainer, wiki_test_dataset)


Test Accuracy: 68.99%


Test Accuracy: 89.22%


In [None]:
import os
import openai


openai.api_key = 'sk-zdV6D5FqM-TU2j_PPQsJzA'

openai.api_base = 'https://cmu.litellm.ai'

# (Optional) Set the API type if required by the proxy
openai.api_type = 'open_ai'  # Adjust if necessary




In [None]:
def get_chatgpt_prediction(text):
    prompt = f"Do a binary classification on the politeness for the given text. If it's polite then output 1. Otherwise output 0 for impolite.\n\n{text}"

    response = openai.ChatCompletion.create(
        model="gpt-4o",  # Use a model supported by the proxy
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=1,
        temperature=0,
    )

    prediction = response['choices'][0]['message']['content'].strip()
    return prediction


In [None]:
import time

def get_predictions(df):
    predictions = []
    for idx, row in df.iterrows():
        text = row['text']
        prediction = get_chatgpt_prediction(text)
        predictions.append(prediction)
        time.sleep(1)  # Adjust based on rate limits
    return predictions


In [None]:
se_chatgpt_predictions_raw = get_predictions(se_test_df)
wiki_chatgpt_predictions_raw = get_predictions(wiki_test_df)


APIRemovedInV1: 

You tried to access openai.Completion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [None]:
def process_predictions(predictions):
    processed = []
    for pred in predictions:
        if '1' in pred:
            processed.append('polite')
        elif '0' in pred:
            processed.append('impolite')
        else:
            processed.append('unknown')
    return processed

se_chatgpt_predictions = process_predictions(se_chatgpt_predictions_raw)
wiki_chatgpt_predictions = process_predictions(wiki_chatgpt_predictions_raw)

def evaluate_chatgpt(df, predictions):
    df = df.copy()
    df['prediction'] = predictions
    df = df[df['prediction'] != 'unknown']
    accuracy = (df['label'] == df['prediction']).mean()
    return accuracy

se_chatgpt_accuracy = evaluate_chatgpt(se_test_df, se_chatgpt_predictions)
wiki_chatgpt_accuracy = evaluate_chatgpt(wiki_test_df, wiki_chatgpt_predictions)

print(f"Stack Exchange ChatGPT Accuracy: {se_chatgpt_accuracy*100:.2f}%")
print(f"Wikipedia ChatGPT Accuracy: {wiki_chatgpt_accuracy*100:.2f}%")
