In [77]:
import torch
import pandas as pd

def load_arguments(filename: str):
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()
        headers = lines[0].strip().split("\t")
        records = [l.strip().split("\t") for l in lines[1:]]
    return headers, records


def load_arguments_as_df(filename: str):
    return pd.read_csv(filename, encoding="utf8", sep="\t").set_index("Argument ID")


def load_value_categories(filename: str):
    return pd.read_csv(filename, encoding="utf8", sep="\t").set_index("Argument ID")
# Encode labels to vector space

opposite_pairs = {
    "Self-direction: thought": (0, 1),
    "Self-direction: action": (1, 1),
    "Stimulation": (2, 1),
    "Hedonism": (3, 1),
    "Achievement": (4, 1),
    "Power: dominance": (5, 1),
    "Power: resources": (6, 1),
    "Face": (7, 1),
    "Security: personal": (8, 1),
    "Security: societal": (9, 1),
    "Tradition": (0, -1),
    "Conformity: rules": (1, -1),
    "Conformity: interpersonal": (2, -1),
    "Humility": (3, -1),
    "Benevolence: caring": (4, -1),
    "Benevolence: dependability": (5, -1),
    "Universalism: concern": (6, -1),
    "Universalism: nature": (7, -1),
    "Universalism: tolerance": (8, -1),
    "Universalism: objectivity": (9, -1),
}

pair_dict = {0: ['Self-direction: thought', 'Tradition'], 1: ['Self-direction: action', 'Conformity: rules'], 2: ['Stimulation', 'Conformity: interpersonal'], 3: ['Hedonism', 'Humility'], 4: ['Achievement', 'Benevolence: caring'],
             5: ['Power: dominance', 'Benevolence: dependability'], 6: ['Power: resources', 'Universalism: concern'], 7: ['Face', 'Universalism: nature'], 8: ['Security: personal', 'Universalism: tolerance'], 9: ['Security: societal', 'Universalism: objectivity']}


def encode_label(inputRow):
    output_vector = [0] * len(pair_dict)
    for k, v in pair_dict.items():
        output_vector[k] = inputRow[v[0]] + inputRow[v[1]] * -1
    return output_vector

def decode_label(embeddedVal, threshold=0.3):
    out = [0] * 20
    for i, v in pair_dict.items():
        if(embeddedVal[i] >= threshold):
            out[i] = 1
        elif(embeddedVal[i] <= (-threshold)):
            out[i + 10] = 1
    return out



In [79]:

# from helper import load_arguments, \
#     load_arguments_as_df,\
#     load_value_categories, encode_label

argument_file_path = "data/arguments-training.tsv"
headers, argument_list = load_arguments(argument_file_path)
argument_df = load_arguments_as_df(argument_file_path)

labels_file_path = "data/labels-training.tsv"
labels_df = load_value_categories(labels_file_path)

# validation
val_argument_file_path = "data/arguments-validation.tsv"
val_labels_file_path = "data/labels-validation.tsv"
val_headers, val_argument_list  = load_arguments(val_argument_file_path)
val_argument_df = load_arguments_as_df(val_argument_file_path)
val_labels_df = load_value_categories(val_labels_file_path)

In [80]:
val_argument_df.shape

(1896, 3)

In [2]:
argument_df["Stance"].unique()

array(['in favor of', 'against', 'in favour of'], dtype=object)

In [3]:
labels_df.head(5)

Unnamed: 0_level_0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
Argument ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A01002,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01005,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
A01006,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01007,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0
A01008,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0


In [4]:
# Clean arguments
argument_df["Stance_encoded"] = argument_df.apply(lambda row: 0 if row["Stance"] == "against" else 1, axis=1)

In [5]:
for s in labels_df.columns:
    print(f'"{s}"')

"Self-direction: thought"
"Self-direction: action"
"Stimulation"
"Hedonism"
"Achievement"
"Power: dominance"
"Power: resources"
"Face"
"Security: personal"
"Security: societal"
"Tradition"
"Conformity: rules"
"Conformity: interpersonal"
"Humility"
"Benevolence: caring"
"Benevolence: dependability"
"Universalism: concern"
"Universalism: nature"
"Universalism: tolerance"
"Universalism: objectivity"


In [6]:
torch.cat(tuple(labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        ...,
        [ 0.,  0.,  0.,  ..., -1.,  0.,  0.],
        [ 0., -1.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  ...,  0.,  0.,  1.]])

In [7]:
print("Max length conclusion: {}, premise: {}".format(argument_df["Conclusion"].map(lambda x: len(x.split(" "))).max(), argument_df["Premise"].map(lambda x: len(x.split(" "))).max()))
# Conclusion length 64, Premise length 256

Max length conclusion: 35, premise: 133


In [8]:
from transformers import BertTokenizer, BertModel
import numpy as np

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

sentence_model = BertModel.from_pretrained("prajjwal1/bert-small")

def encode_text(text:str, max_length:int):
    encoded = tokenizer.encode_plus(
        text=text,
        add_special_tokens=True,
        truncation=True,
        max_length = max_length,
        padding="max_length",
        return_attention_mask = True,
        return_tensors = "pt"
    )
    output = sentence_model(encoded["input_ids"])
    _, pooled_output = output[:2]
#     return torch.cat((encoded["input_ids"], encoded["attention_mask"]), dim=1)
    return pooled_output[0]

Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
encode_text(argument_df.iloc[0][0], 64).size()

torch.Size([512])

In [10]:
# Build features
conclusions = list()
with torch.no_grad():
    for i, v in argument_df.iterrows():
        conclusions.append(encode_text(v["Conclusion"], 64).reshape(1,-1))
conclusions = torch.cat(conclusions, dim=0)

arguments = list()
with torch.no_grad():
    for i, v in argument_df.iterrows():
        arguments.append(encode_text(v["Premise"], 256).reshape(1,-1))
arguments = torch.cat(arguments, dim=0)

stance = torch.Tensor(tuple(argument_df["Stance_encoded"])).reshape(-1,1)

In [11]:
stance = stance.reshape(-1,1)

In [12]:
conclusions.size()
arguments.size()
stance.size()

torch.Size([5393, 1])

In [29]:
x_train = torch.cat((conclusions, arguments, stance), dim=1)
y_train = torch.cat(tuple(labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

In [30]:
x_train.size()

torch.Size([5393, 1025])

In [33]:
torch.min(x_train, dim=1).values.size()

torch.Size([5393])

In [40]:
torch.max(x_train.abs(), dim=1, keepdim=True)[0].size()

torch.Size([5393, 1])

In [39]:
(x_train/torch.max(x_train.abs(), dim=1, keepdim=True)[0]).size()

torch.Size([5393, 1025])

In [None]:
x_train.size()

In [None]:
y_train.size()

https://stackoverflow.com/questions/68011633/train-multi-output-regression-model-in-pytorch

In [41]:
import torch

class HumanValueRegressor(torch.nn.Module):
    def __init__(self):
        super(HumanValueRegressor, self).__init__()
        self.hidden1 = torch.nn.Linear(1025, 500)
        self.hidden2 = torch.nn.Linear(500, 500)
        self.output = torch.nn.Linear(500, 10)
        
        torch.nn.init.xavier_uniform_(self.hidden1.weight)
        torch.nn.init.zeros_(self.hidden1.bias)
        torch.nn.init.xavier_uniform_(self.hidden2.weight)
        torch.nn.init.zeros_(self.hidden2.bias)
        torch.nn.init.xavier_uniform_(self.output.weight)
        torch.nn.init.zeros_(self.output.bias)
        
    def forward(self, x):
        x = torch.relu(self.hidden1(x))
        x = torch.relu(self.hidden2(x))
#         x = torch.tanh(self.output(x))
        x = self.output(x)
        return x/(torch.max(x.abs(), dim=1, keepdim=True)[0])
    
model = HumanValueRegressor()

In [42]:
from tqdm.notebook import tqdm
import random

def training_loop(
    in_features, 
    label_features,
    batch_size,
    epochs,
    model
):
    print("Training...")
    criterion = torch.nn.MSELoss()

    # Create batches
    batches = []
    for i in range(0, len(in_features), batch_size):
        batches.append(
        (
            in_features[i:i+batch_size],
            label_features[i:i+batch_size]
        ))
        
    random.shuffle(batches)

    for i in range(epochs):
        losses = []
        for features, labels in tqdm(batches):
            optimizer.zero_grad()
            out_pred = model(features)
            loss = criterion(out_pred, labels)
#             print(loss)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        print(f"epoch {i}, loss: {sum(losses)/len(losses)}")
    return model

In [68]:
# Training
LR = 0.000001
optimizer = torch.optim.Adam(model.parameters(), LR)
epochs = 100
batch_size = 8

trained_model = training_loop(
    x_train, 
    y_train,
    batch_size,
    epochs,
    model
)

Training...


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 0, loss: 0.2238602845536338


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 1, loss: 0.22002437670473698


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 2, loss: 0.21951903935935763


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 3, loss: 0.21922825894422002


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 4, loss: 0.21900530984004338


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 5, loss: 0.21883219335918072


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 6, loss: 0.21867329715026748


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 7, loss: 0.21853735069985744


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 8, loss: 0.2184087244504028


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 9, loss: 0.21830076111687555


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 10, loss: 0.21818894978474687


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 11, loss: 0.21808905212967483


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 12, loss: 0.21799258360156307


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 13, loss: 0.21789815703475918


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 14, loss: 0.2178102670168435


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 15, loss: 0.2177208024042624


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 16, loss: 0.21762772995012777


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 17, loss: 0.21754653883201105


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 18, loss: 0.21745565901199976


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 19, loss: 0.21738174828114332


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 20, loss: 0.21730319027547484


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 21, loss: 0.21722534930816403


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 22, loss: 0.21715944638958684


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 23, loss: 0.2170821760594845


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 24, loss: 0.21701386472693196


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 25, loss: 0.21694744588600265


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 26, loss: 0.21688488100413922


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 27, loss: 0.21682079864320933


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 28, loss: 0.21675507986435183


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 29, loss: 0.21669384697521174


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 30, loss: 0.21662584835732424


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 31, loss: 0.21656111744818865


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 32, loss: 0.21650068322817484


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 33, loss: 0.21644158844042707


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 34, loss: 0.21637790846603888


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 35, loss: 0.21632704495831773


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 36, loss: 0.21626637658587208


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 37, loss: 0.2161949188841714


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 38, loss: 0.2161360380936552


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 39, loss: 0.2160846402358126


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 40, loss: 0.21602827630661153


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 41, loss: 0.2159598569461593


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 42, loss: 0.21589817586872312


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 43, loss: 0.21583781399108745


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 44, loss: 0.21578024175983887


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 45, loss: 0.21572353381801534


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 46, loss: 0.2156715166237619


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 47, loss: 0.21561179099259553


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 48, loss: 0.21556399907778812


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 49, loss: 0.21550942360251038


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 50, loss: 0.21546011749241087


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 51, loss: 0.21540870922583122


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 52, loss: 0.2153511086878953


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 53, loss: 0.21529662338671862


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 54, loss: 0.21524232787114603


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 55, loss: 0.21519659013659867


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 56, loss: 0.21514290230141744


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 57, loss: 0.21509706593773983


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 58, loss: 0.2150481578928453


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 59, loss: 0.21500129634031542


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 60, loss: 0.21495693811663874


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 61, loss: 0.21490794035019697


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 62, loss: 0.21485396237285048


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 63, loss: 0.21480771487509762


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 64, loss: 0.2147619031811202


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 65, loss: 0.21471601663364304


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 66, loss: 0.21466947057180935


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 67, loss: 0.21461516076215992


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 68, loss: 0.2145651808877786


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 69, loss: 0.2145128688988862


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 70, loss: 0.21446480306210342


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 71, loss: 0.2144152082613221


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 72, loss: 0.21437046940679905


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 73, loss: 0.2143256662565249


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 74, loss: 0.21427952087587782


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 75, loss: 0.21423409134149551


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 76, loss: 0.21419550116415376


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 77, loss: 0.2141461873827157


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 78, loss: 0.2141005062394672


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 79, loss: 0.2140571864777141


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 80, loss: 0.2140138609762545


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 81, loss: 0.21396643616535047


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 82, loss: 0.21392429856238543


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 83, loss: 0.21387380005032927


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 84, loss: 0.21383053187970763


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 85, loss: 0.21378868416503624


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 86, loss: 0.2137425503300296


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 87, loss: 0.2136979376607471


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 88, loss: 0.21366016249965739


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 89, loss: 0.21361851387553746


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 90, loss: 0.2135767641939499


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 91, loss: 0.21353334190116988


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 92, loss: 0.21349095921825478


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 93, loss: 0.2134425345449536


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 94, loss: 0.21339901471579517


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 95, loss: 0.21335494232398491


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 96, loss: 0.21331316772986342


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 97, loss: 0.2132689537163134


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 98, loss: 0.21322726680172815


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 99, loss: 0.2131876422409658


In [69]:
with torch.no_grad():
    x = trained_model(x_train)
x

tensor([[-0.4533, -0.4105, -0.0107,  ..., -0.3729,  0.4952,  0.0685],
        [-0.0034,  0.0898, -0.1318,  ...,  0.0308,  1.0000, -0.0477],
        [ 0.0925, -0.2040,  0.0037,  ..., -0.0483, -0.0454,  1.0000],
        ...,
        [-0.1326, -0.0204, -0.0130,  ..., -0.3931,  1.0000, -0.2053],
        [-0.0869, -0.0179, -0.0078,  ..., -0.3684,  1.0000, -0.2054],
        [ 0.0738,  0.6603,  0.0161,  ...,  0.1870,  0.0628,  1.0000]])

In [73]:
x[0]

tensor([-0.4533, -0.4105, -0.0107, -0.4843, -0.1984, -0.2157, -1.0000, -0.3729,
         0.4952,  0.0685])

In [49]:
y_train

tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        ...,
        [ 0.,  0.,  0.,  ..., -1.,  0.,  0.],
        [ 0., -1.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  ...,  0.,  0.,  1.]])

In [67]:
(x - y_train).square().mean()

tensor(0.2345)

In [72]:
decode_label(x[0])

[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0]