In [259]:
%load_ext autoreload
%autoreload 2
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from typing import List, Tuple, Dict, Union, Any, Optional
from tqdm import tqdm
try:
    os.chdir(os.path.join(os.getcwd(), '../../Language-Technology-Project'))
    print(os.getcwd())
except:
    print("ALready in current dir")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
ALready in current dir


# Generative pipeline

In [8]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')
model = GPT2LMHeadModel.from_pretrained('gpt2-xl').to(device)

print(f"size_gb(model) = {model.num_parameters() * 4 / 1024**3:.2f} GB")
print("vocab size:", tokenizer.vocab_size)

size_gb(model) = 5.80 GB
vocab size: 50257


In [9]:
text = "Man is a doctor as a women is a"
encoded_input = tokenizer(text, return_tensors='pt').to(device)
output = model(**encoded_input)
print(output.logits.shape)
next_token_logits = output.logits[:, -1, :]
likelihoods = torch.softmax(next_token_logits, dim=-1)
sorted_likelihoods, sorted_indices = torch.sort(likelihoods, descending=True)
print("Top 5 tokens:")
for i in range(5):
    print(f"{i+1}: {tokenizer.decode(sorted_indices[0, i])} ({sorted_likelihoods[0, i]:.2f})")

torch.Size([1, 9, 50257])
Top 5 tokens:
1:  woman (0.11)
2:  doctor (0.08)
3:  nurse (0.05)
4:  mother (0.04)
5:  man (0.03)


In [281]:
import torch

def get_likelihoods_of_words_given_context(prompt: str, words: list, model, tokenizer):
    """
    Returns a list of likelihoods of words given a prompt.
    """
    encoded_input = tokenizer(prompt, return_tensors='pt').to(device)
    output = model(**encoded_input)
    next_token_logits = output.logits[:, -1, :]
    likelihoods = torch.softmax(next_token_logits, dim=-1)
    word_probs = []
    for word in words:
        tokens = tokenizer.encode(word)
        # word_probs.append(likelihoods[0, tokenizer.encode(word)[0]].item())
        probability = 1
        for token in tokens:
            probability *= likelihoods[0, token].item()
        word_probs.append(probability)
    
    output = list(zip(words, word_probs))
    output.sort(key=lambda x: x[1], reverse=True)
    return output
# example
prompt = "2+2="
options = ["4", "5", "6", "7", "8", "9"]
get_likelihoods_of_words_given_context(prompt, options, model, tokenizer)


[('4', 0.20943237841129303),
 ('5', 0.17149624228477478),
 ('6', 0.06978660076856613),
 ('7', 0.05673491582274437),
 ('8', 0.031708039343357086),
 ('9', 0.022482799366116524)]

## Category classifier

In [292]:
from settings import categories

class LikelihoodBasedCategoryClassifier:

    def __init__(self, categories):
        self.categories = categories

    def __shuffle_categories(self):
        np.random.shuffle(self.categories)
    
    def __construct_classification_prompt(self, sentence, max_length=128, categories=None):
        prompt = "Categories of Human Value Detection 2023 by name and description: \n\n"
        expectation2category = {}
        for i, category in enumerate(categories):
            name = category["name"]
            description = category["description"]
            examples = category["examples"]
            example = examples[np.random.randint(len(examples))]
            example = example[:max_length] + "..." if len(example) > max_length else example
            # otpions a-z
            prompt += f"- {name}: {description} (e.g., {example})\n"
            expectation2category[name] = name
        prompt += "\n"
        prompt += "Classify the following sentence into one of the categories above:\n"
        prompt += f"\n'Argument: {sentence}"
        prompt += "\Category: "

        return prompt, expectation2category

    def classify_once(self, sentence: str) -> List[float]:
        self.__shuffle_categories()
        prompt, expectations = self.__construct_classification_prompt(sentence, categories=self.categories)
        likelihoods = get_likelihoods_of_words_given_context(prompt, expectations.keys(), model, tokenizer)
        likelihoods = [(expectations[c], p) for c, p in likelihoods]
        return likelihoods
    
    def __call__(self, sentence: str, trial_count=5) -> List[float]:
        results = {category["name"]: 0.0 for category in self.categories}
        for _ in range(trial_count):
            likelihoods = self.classify_once(sentence)
            for category, likelihood in likelihoods:
                results[category] += likelihood / trial_count
        results = list(results.items())
        results.sort(key=lambda x: x[1], reverse=True)
        return results
    

In [293]:
classifier = LikelihoodBasedCategoryClassifier(categories)
# take a random category 
category = categories[np.random.randint(len(categories))]
sentence = np.random.choice(category["examples"])
print(f"Category: {category['name']}")
print(f"Sentence: {sentence}")

likelihoods = classifier(sentence)
print(likelihoods)
if likelihoods[0][0] == category["name"]:
    print("Correct!")
else:
    print("Incorrect!")

Category: power
Sentence: It is good to be in positions of control over others.
Categories of Human Value Detection 2023 by name and description: 

- hedonism: Pleasure or sensuous gratification for oneself. (e.g., It is important to have a good time.)
- stimulation: It is good to experience excitement, novelty, and change. (e.g., Have an exciting life: arguments towards allowing people to experience foreign places and special activities or having perspecti...)
- self-direction: Independent thought and action; choosing, creating, exploring. (e.g., Independence is important for me to make my own decisions.)
- conformity: Restraint of actions, inclinations, and impulses likely to upset or harm others and violate social expectations or norms. (e.g., It is important to respect traditions.)
- tradition: Respect, commitment, and acceptance of the customs and ideas that traditional culture or religion provide the self. (e.g., It is important to uphold traditions.)
- security: Safety, harmony,

In [277]:
# basic evaluation go throguuh all categories and all examples
def evaluate_classifier(classifier, categories, trial_count=5):
    results = {category["name"]: [] for category in categories}
    
    for category in categories:
        for sentence in category["examples"]:
            likelihoods = classifier(sentence, trial_count=trial_count)
            results[category["name"]].append(likelihoods)
    # compute accuracy
    correct = 0
    total = 0
    for category in categories:
        for likelihoods in results[category["name"]]:
            if likelihoods[0][0] == category["name"]:
                correct += 1
            total += 1
    accuracy = correct / total
    return accuracy, results

accuracy, results = evaluate_classifier(classifier, categories)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.00
