In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

try:
    os.chdir(os.path.join(os.getcwd(), '../../Language-Technology-Project'))
    print(os.getcwd())
except:
    print("ALready in current dir")

c:\Users\mo\Documents\Organisations\RUG\Language-Technology-Project


# Generative pipeline

In [4]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')
model = GPT2LMHeadModel.from_pretrained('gpt2-xl').to(device)

print(f"size_gb(model) = {model.num_parameters() * 4 / 1024**3:.2f} GB")
print("vocab size:", tokenizer.vocab_size)

size_gb(model) = 5.80 GB
vocab size: 50257


In [10]:
text = "Man is a doctor as a women is a"
encoded_input = tokenizer(text, return_tensors='pt').to(device)
output = model(**encoded_input)
print(output.logits.shape)
next_token_logits = output.logits[:, -1, :]
likelihoods = torch.softmax(next_token_logits, dim=-1)
sorted_likelihoods, sorted_indices = torch.sort(likelihoods, descending=True)
print("Top 5 tokens:")
for i in range(5):
    print(f"{i+1}: {tokenizer.decode(sorted_indices[0, i])} ({sorted_likelihoods[0, i]:.2f})")

torch.Size([1, 9, 50257])
Top 5 tokens:
1:  woman (0.11)
2:  doctor (0.08)
3:  nurse (0.05)
4:  mother (0.04)
5:  man (0.03)


In [13]:
import torch

def get_likelihoods_of_words_given_context(prompt: str, words: list, model, tokenizer):
    """
    Returns a list of likelihoods of words given a prompt.
    """
    word_probs = []

    for word in words:
        word_token = tokenizer.encode(word, add_special_tokens=False)
        tokens = tokenizer(prompt, return_tensors='pt')['input_ids'].to(device)
        tokens = torch.cat([tokens, torch.tensor(word_token).unsqueeze(0).to(device)], dim=-1)
        outputs = model(tokens)
        logits = outputs[0]
        probabilities = torch.softmax(logits, dim=-1)

        if len(word_token) == 1:
            word_prob = probabilities[0, -1, word_token[0]].item()
            word_probs.append(word_prob)
        else:
            word_prob = 1.0
            for idx in range(len(word_token)):
                word_prob *= probabilities[0, -(len(word_token) - idx), word_token[idx]].item()
            word_probs.append(word_prob)

    return list(zip(words, word_probs))
# example
prompt = "Man is a doctor as a women is a"
get_likelihoods_of_words_given_context(prompt, ["nurse", "doctor", "teacher"], model, tokenizer)


[('nurse', 1.080520402501984e-10),
 ('doctor', 6.820724138378864e-06),
 ('teacher', 1.1379043641373526e-11)]

## Category classifier

In [16]:
categories = [
    {
        "name:": "universalism",
        "description": "Understanding, appreciation, tolerance, and protection for the welfare of all people and for nature.",
        "examples": [
            "Equality is important to me.",
            "It is good to accept and try to understand those who are different from oneself."
        ],
        "children": []
    },
    {
        "name:": "self-direction",
        "description": "Independent thought and action; choosing, creating, exploring.",
        "examples": [
            "It is good to search for the truth and think in a rational and unbiased way.",
            "Independence is important for me to make my own decisions.",
        ],
        "children": []
    },
    {
        "name": "stimulation",
        "description": "It is good to experience excitement, novelty, and change.",
        "examples": [
            "Have an exciting life: arguments towards allowing people to experience foreign places and special activities or having perspective-changing experiences",
            "Have a varied life: arguments towards allowing people to engage in many activities and change parts of their life or towards promoting local clubs (sports, ...)",
            "Be daring: arguments towards more risk-taking"
        ],
        "children": []
    },
    {
        "name": "hedonism",
        "description": "Pleasure or sensuous gratification for oneself.",
        "examples": [
            "It is important to do things that make you feel good.",
            "It is important to have a good time.",
        ],
        "children": []
    },
    {
        "name": "achievement",
        "description": "It is good to be successful in accordance with social norms.",
        "examples": [
            "It is important to show your abilities.",
            "Be ambitious: arguments towards allowing for ambitions and climbing up the social ladder",
            "Have success: arguments towards allowing for success and recognizing achievements",
            "Be capable: arguments towards acquiring competence in certain tasks, being more effective, and showing competence in solving tasks",
            "Be intellectual: arguments towards acquiring high cognitive skills, being more reflective, and showing intelligence",
            "Be courageous: arguments towards being more courageous and having people stand up for their beliefs"
        ],
        "children": []
    },
    {
        "name": "power",
        "description": "Social status and prestige, control or dominance over people and resources.",
        "examples": [
            "It is important to be rich.",
            "It is good to be in positions of control over others.",
        ],
        "children": []
    },
    {
        "name": "security",
        "description": "Safety, harmony, and stability of society, of relationships, and of self.",
        "examples": [
            "It is important to live in secure surroundings.",
            "It is important to plan ahead so as to avoid surprises.",
        ],
        "children": []
    },
    {
        "name": "tradition",
        "description": "Respect, commitment, and acceptance of the customs and ideas that traditional culture or religion provide the self.",
        "examples": [
            "It is important to uphold traditions.",
            "It is important to follow the customs of my religion.",
        ],
    },
    {
        "name": "conformity",
        "description": "Restraint of actions, inclinations, and impulses likely to upset or harm others and violate social expectations or norms.",
        "examples": [
            "It is important to behave properly.",
            "It is important to respect traditions.",
        ],
        "children": []
    },
    {
        "name": "benevolence",
        "description": "Preservation and enhancement of the welfare of people with whom one is in frequent personal contact.",
        "examples": [
            "It is important to help the people around you.",
            "It is important to be loyal to your friends.",
        ],
        "children": []
    }
]