In [None]:
import pandas as pd
import ast

filename = 'ratings'
save_path = f"../data/{filename}"

dimensions = ["Tactfulness", "Helpfulness", "Clearness", "Astuteness"]

ratings = pd.read_csv(f'../data/{filename}.csv')
ratings = ratings[ratings['Tactfulness'].notna()]
ratings['chat'] = ratings['chat'].apply(ast.literal_eval)
ratings.head()

In [None]:
import torch
from torch import Tensor
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
chats = ratings['chat'].tolist()

max_sen_len = 0
max_nodes = 0
tokenized = []
for c in chats:
    t = model.tokenize(c)['input_ids']
    max_sen_len = max(max_sen_len, t.size(1))
    max_nodes = max(max_nodes, t.size(0))
    tokenized.append(t)


node_tensor = torch.zeros(len(tokenized), max_nodes, max_sen_len)
for i, c in enumerate(tokenized): 
    p = F.pad(c, (0, max_sen_len - c.size(1), 0, max_nodes - c.size(0)))
    node_tensor[i] = p

node_tensor = node_tensor.long()
torch.save(node_tensor, f"{save_path}/nodes.pt")

In [None]:
labels = Tensor(ratings[dimensions].values)
torch.save(Tensor(labels), f"{save_path}/labels.pt")

In [None]:
from torch import Tensor

def create_chat_graph(chat, max_edges):
    human_idxs = [i for i in range(0, len(chat), 2)]

    chat_edges = []
    chat_edges_idxs = []
    for ui in range(len(chat)):
        for uj in range(len(chat)):
            if ui == uj:
                edge_type = [True, False, False, False]
            else:
                edge_type = [
                    False,
                    ui > uj,
                    ui in human_idxs,
                    uj in human_idxs,
                ]

            edge_type = sum(2**i for i, v in enumerate(reversed(edge_type)) if v)

            chat_edges_idxs.append((ui, uj))
            chat_edges.append(edge_type)
        
    chat_edges_pad = chat_edges + [0] * (max_edges - len(chat_edges))
    chat_edges_idxs_pad = chat_edges_idxs + [(0, 0)] * (max_edges - len(chat_edges_idxs))
    
    return chat_edges_pad, chat_edges_idxs_pad


max_edges = max_nodes**2

edges = torch.zeros(len(chats), max_edges, dtype=torch.int32)
edge_idxs = torch.zeros(len(chats), 2, max_edges, dtype=torch.int64)

for i, c in enumerate(chats):
   c_edges, c_edge_idxs = create_chat_graph(c, max_edges)

   edges[i] = Tensor(c_edges)
   edge_idxs[i] = Tensor(c_edge_idxs).T

torch.save(edges, f"{save_path}/edges.pt")
torch.save(edge_idxs, f"{save_path}/edge_idxs.pt")