In [1]:
import torch
from transformers import AutoModel, AutoTokenizer
from transformers import TextClassificationPipeline
import pandas as pd
import numpy as np
import tqdm
from tqdm import tqdm
import sys, os
from transformers import Pipeline
from torch import Tensor 
import torch.nn as nn
from torch.utils.data import DataLoader
from datasets import load_dataset
import torch.nn as nn
import sentence_transformers
from projection_helper import project_points_onto_axes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load Data and Model

In [9]:
DATASET_REPO = "go_emotions"
MODEL_REPO = "shreyahavaldar/roberta-base-go_emotions"

def load_data():
    hf_dataset = load_dataset(DATASET_REPO, "raw")
    #convert to torch dataset
    hf_dataset.set_format(type='torch')
    return hf_dataset

def load_model():
    model = AutoModel.from_pretrained(MODEL_REPO)
    #convert to pytorch model
    torch_model = nn.Sequential(model, nn.Linear(model.config.hidden_size, 28))
    model.to(device)
    return torch_model

### Define Alignment Metric

In [91]:
class Metric(nn.Module): 
    def __init__(self, model_name:str="distiluse-base-multilingual-cased"): 
        super(Metric, self).__init__()
        self.model = sentence_transformers.SentenceTransformer(model_name)
        points = self.define_circumplex()
        self.x1 = points[0]
        self.x2 = points[1]
        self.y1 = points[3]
        self.y2 = points[2]

    def define_circumplex(self):
        emotions = pd.read_csv("russell_emotions.csv")
        axis_labels = ["NV", "PV", "HA", "LA"]
        axis_points = []
        for label in axis_labels:
            emotion_words = emotions[emotions["label"] == label]["emotion"].values
            emotion_embeddings = self.model.encode(emotion_words)
            axis_points.append(np.mean(emotion_embeddings, axis=0))
        return axis_points
    
    def distance_from_circumplex(self, embeddings):
        projection = project_points_onto_axes(embeddings, self.x1, self.x2, self.y1, self.y2)
        x_projections = projection[0]
        y_projections = projection[1]
        distances = []
        for x, y in zip(x_projections, y_projections):
            distances.append(np.abs(np.sqrt(x**2 + y**2)-1))
        return np.mean(distances)

    # input: list of words
    def calculate_group_alignment(self, groups:list, language:str="english"):
        distances = []
        for group in groups:
            embeddings = self.model.encode(group)
            distances.append(self.distance_from_circumplex(embeddings))
        return distances
        
    def forward(self, zp, x=None, y=None, z=None, reduce=True, **kwargs): 
        pass

### Example Group Alignment Calculation

In [94]:
metric = Metric()
sample_groups = [["however", "therefore", "unless"], 
                ["sad", "happy", "thrilled"], 
                ["computer", "neural network", "compiler"], 
                ["tired", "sleepy", "calm"]]
alignments = metric.calculate_group_alignment(sample_groups)
for group, alignment in zip(sample_groups, alignments):
    print(f"Group: {group}, Alignment: {alignment}")

Group: ['however', 'therefore', 'unless'], Alignment: 0.8077621152905459
Group: ['sad', 'happy', 'thrilled'], Alignment: 0.32860005172434187
Group: ['computer', 'neural network', 'compiler'], Alignment: 0.6724962101561861
Group: ['tired', 'sleepy', 'calm'], Alignment: 0.34622350256426504
