In [1]:
import torch
import pandas as pd

def load_arguments(filename: str):
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()
        headers = lines[0].strip().split("\t")
        records = [l.strip().split("\t") for l in lines[1:]]
    return headers, records


def load_arguments_as_df(filename: str):
    return pd.read_csv(filename, encoding="utf8", sep="\t").set_index("Argument ID")


def load_value_categories(filename: str):
    return pd.read_csv(filename, encoding="utf8", sep="\t").set_index("Argument ID")
# Encode labels to vector space

opposite_pairs = {
    "Self-direction: thought": (0, 1),
    "Self-direction: action": (1, 1),
    "Stimulation": (2, 1),
    "Hedonism": (3, 1),
    "Achievement": (4, 1),
    "Power: dominance": (5, 1),
    "Power: resources": (6, 1),
    "Face": (7, 1),
    "Security: personal": (8, 1),
    "Security: societal": (9, 1),
    "Tradition": (0, -1),
    "Conformity: rules": (1, -1),
    "Conformity: interpersonal": (2, -1),
    "Humility": (3, -1),
    "Benevolence: caring": (4, -1),
    "Benevolence: dependability": (5, -1),
    "Universalism: concern": (6, -1),
    "Universalism: nature": (7, -1),
    "Universalism: tolerance": (8, -1),
    "Universalism: objectivity": (9, -1),
}

pair_dict = {0: ['Self-direction: thought', 'Tradition'], 1: ['Self-direction: action', 'Conformity: rules'], 2: ['Stimulation', 'Conformity: interpersonal'], 3: ['Hedonism', 'Humility'], 4: ['Achievement', 'Benevolence: caring'],
             5: ['Power: dominance', 'Benevolence: dependability'], 6: ['Power: resources', 'Universalism: concern'], 7: ['Face', 'Universalism: nature'], 8: ['Security: personal', 'Universalism: tolerance'], 9: ['Security: societal', 'Universalism: objectivity']}


def encode_label(inputRow):
    output_vector = [0] * len(pair_dict)
    for k, v in pair_dict.items():
        output_vector[k] = inputRow[v[0]] + inputRow[v[1]] * -1
    return output_vector

def decode_label(embeddedVal, threshold=0.3):
    out = [0] * 20
    for i, v in pair_dict.items():
        if(embeddedVal[i] >= threshold):
            out[i] = 1
        elif(embeddedVal[i] <= (-threshold)):
            out[i + 10] = 1
    return out



In [2]:

# from helper import load_arguments, \
#     load_arguments_as_df,\
#     load_value_categories, encode_label

argument_file_path = "data/arguments-training.tsv"
headers, argument_list = load_arguments(argument_file_path)
argument_df = load_arguments_as_df(argument_file_path)

labels_file_path = "data/labels-training.tsv"
labels_df = load_value_categories(labels_file_path)

# validation
val_argument_file_path = "data/arguments-validation.tsv"
val_labels_file_path = "data/labels-validation.tsv"
val_headers, val_argument_list  = load_arguments(val_argument_file_path)
val_argument_df = load_arguments_as_df(val_argument_file_path)
val_labels_df = load_value_categories(val_labels_file_path)

In [51]:
labels_df

Unnamed: 0_level_0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
Argument ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A01002,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01005,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
A01006,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01007,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0
A01008,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
E08016,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0
E08017,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1
E08018,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
E08019,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,0,1


In [3]:
val_argument_df.shape

(1896, 3)

In [4]:
argument_df["Stance"].unique()

array(['in favor of', 'against', 'in favour of'], dtype=object)

In [5]:
labels_df.head(5)

Unnamed: 0_level_0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
Argument ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A01002,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01005,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
A01006,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
A01007,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0
A01008,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0


In [6]:
# Clean arguments
argument_df["Stance_encoded"] = argument_df.apply(lambda row: 0 if row["Stance"] == "against" else 1, axis=1)

In [7]:
for s in labels_df.columns:
    print(f'"{s}"')

"Self-direction: thought"
"Self-direction: action"
"Stimulation"
"Hedonism"
"Achievement"
"Power: dominance"
"Power: resources"
"Face"
"Security: personal"
"Security: societal"
"Tradition"
"Conformity: rules"
"Conformity: interpersonal"
"Humility"
"Benevolence: caring"
"Benevolence: dependability"
"Universalism: concern"
"Universalism: nature"
"Universalism: tolerance"
"Universalism: objectivity"


In [8]:
torch.cat(tuple(labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        ...,
        [ 0.,  0.,  0.,  ..., -1.,  0.,  0.],
        [ 0., -1.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  ...,  0.,  0.,  1.]])

In [9]:
print("Max length conclusion: {}, premise: {}".format(argument_df["Conclusion"].map(lambda x: len(x.split(" "))).max(), argument_df["Premise"].map(lambda x: len(x.split(" "))).max()))
# Conclusion length 64, Premise length 256

Max length conclusion: 35, premise: 133


In [10]:
from transformers import BertTokenizer, BertModel
import numpy as np

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

sentence_model = BertModel.from_pretrained("prajjwal1/bert-small")

def encode_text(text:str, max_length:int):
    encoded = tokenizer.encode_plus(
        text=text,
        add_special_tokens=True,
        truncation=True,
        max_length = max_length,
        padding="max_length",
        return_attention_mask = True,
        return_tensors = "pt"
    )
    output = sentence_model(encoded["input_ids"])
    _, pooled_output = output[:2]
#     return torch.cat((encoded["input_ids"], encoded["attention_mask"]), dim=1)
    return pooled_output[0]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
encode_text(argument_df.iloc[0][0], 64).size()

torch.Size([512])

In [12]:
# Build features
conclusions = list()
with torch.no_grad():
    for i, v in argument_df.iterrows():
        conclusions.append(encode_text(v["Conclusion"], 64).reshape(1,-1))
conclusions = torch.cat(conclusions, dim=0)

arguments = list()
with torch.no_grad():
    for i, v in argument_df.iterrows():
        arguments.append(encode_text(v["Premise"], 256).reshape(1,-1))
arguments = torch.cat(arguments, dim=0)

stance = torch.Tensor(tuple(argument_df["Stance_encoded"])).reshape(-1,1)

In [13]:
stance = stance.reshape(-1,1)

In [12]:
stance.size(),conclusions.size(),arguments.size()

torch.Size([5393, 1])

In [14]:
x_train = torch.cat((conclusions, arguments, stance), dim=1)
y_train = torch.cat(tuple(labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

In [15]:
x_train.size()

torch.Size([5393, 1025])

In [16]:
torch.min(x_train, dim=1).values.size()

torch.Size([5393])

In [17]:
torch.max(x_train.abs(), dim=1, keepdim=True)[0].size()

torch.Size([5393, 1])

In [18]:
(x_train/torch.max(x_train.abs(), dim=1, keepdim=True)[0]).size()

torch.Size([5393, 1025])

In [19]:
x_train.size()

torch.Size([5393, 1025])

In [20]:
y_train.size()

torch.Size([5393, 10])

https://stackoverflow.com/questions/68011633/train-multi-output-regression-model-in-pytorch

In [124]:
import torch

class HumanValueRegressor(torch.nn.Module):
    def __init__(self):
        super(HumanValueRegressor, self).__init__()
        self.hidden1 = torch.nn.Linear(1025, 2000)
        self.hidden2 = torch.nn.Linear(2000, 1000)
        self.hidden3 = torch.nn.Linear(1000, 800)
        self.hidden4 = torch.nn.Linear(800, 500)
        self.output = torch.nn.Linear(500, 10)
        
        torch.nn.init.xavier_uniform_(self.hidden1.weight)
        torch.nn.init.zeros_(self.hidden1.bias)
        torch.nn.init.xavier_uniform_(self.hidden2.weight)
        torch.nn.init.zeros_(self.hidden2.bias)
        torch.nn.init.xavier_uniform_(self.hidden3.weight)
        torch.nn.init.zeros_(self.hidden3.bias)
        torch.nn.init.xavier_uniform_(self.hidden4.weight)
        torch.nn.init.zeros_(self.hidden4.bias)
        torch.nn.init.xavier_uniform_(self.output.weight)
        torch.nn.init.zeros_(self.output.bias)
        
    def forward(self, x):
        x = torch.relu(self.hidden1(x))
        x = torch.relu(self.hidden2(x))
        x = torch.relu(self.hidden3(x))
        x = torch.relu(self.hidden4(x))
        x = self.output(x)
        return x/(torch.max(x.abs(), dim=1, keepdim=True)[0])
    
model = HumanValueRegressor()

In [125]:
from tqdm.notebook import tqdm
import random

def training_loop(
    in_features, 
    label_features,
    batch_size,
    epochs,
    model
):
    print("Training...")
    criterion = torch.nn.MSELoss()

    # Create batches
    batches = []
    for i in range(0, len(in_features), batch_size):
        batches.append(
        (
            in_features[i:i+batch_size],
            label_features[i:i+batch_size]
        ))
        
    random.shuffle(batches)

    for i in range(epochs):
        losses = []
        for features, labels in tqdm(batches):
            optimizer.zero_grad()
            out_pred = model(features)
            loss = criterion(out_pred, labels)
#             print(loss)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        print(f"epoch {i}, loss: {sum(losses)/len(losses)}")
    return model

In [126]:
# Training
LR = 0.00001
optimizer = torch.optim.Adam(model.parameters(), LR)
epochs = 200
batch_size = 8

trained_model = training_loop(
    x_train, 
    y_train,
    batch_size,
    epochs,
    model
)

Training...


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 0, loss: 0.35598775344866296


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 1, loss: 0.3535223433706495


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 2, loss: 0.35190523946726765


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 3, loss: 0.3500270719881411


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 4, loss: 0.34778259950655477


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 5, loss: 0.3454695974455939


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 6, loss: 0.3432052125312664


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 7, loss: 0.3409061199868167


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 8, loss: 0.33846842796714216


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 9, loss: 0.3363373366550163


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 10, loss: 0.3081531906569446


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 11, loss: 0.298890166481336


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 12, loss: 0.29703614685270524


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 13, loss: 0.29536280947702903


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 14, loss: 0.2937580243967198


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 15, loss: 0.2922146218573606


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 16, loss: 0.2906496740049786


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 17, loss: 0.2890223417458711


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 18, loss: 0.28751300635161225


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 19, loss: 0.2863202096577044


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 20, loss: 0.2849211703185682


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 21, loss: 0.2837908391157786


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 22, loss: 0.28748819755183325


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 23, loss: 0.28168666752400223


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 24, loss: 0.2757687052201342


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 25, loss: 0.2690364204512702


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 26, loss: 0.26414460373145565


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 27, loss: 0.2609796630122043


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 28, loss: 0.25814749616163746


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 29, loss: 0.2548639016902005


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 30, loss: 0.25269839701829133


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 31, loss: 0.24951798295533215


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 32, loss: 0.2476504400372505


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 33, loss: 0.24627933611472447


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 34, loss: 0.24625992550894066


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 35, loss: 0.24245969969917228


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 36, loss: 0.24071553754585762


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 37, loss: 0.2378558607896169


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 38, loss: 0.23684796876377529


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 39, loss: 0.23361639553750002


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 40, loss: 0.23038321039190998


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 41, loss: 0.23329141088106015


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 42, loss: 0.22647696903458348


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 43, loss: 0.22701771556227296


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 44, loss: 0.2253243535094791


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 45, loss: 0.22282687585662914


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 46, loss: 0.22218480040629704


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 47, loss: 0.2215297899974717


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 48, loss: 0.21987767909412032


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 49, loss: 0.2181022748571855


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 50, loss: 0.21621021544491803


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 51, loss: 0.21518238971630732


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 52, loss: 0.2141523132280067


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 53, loss: 0.2158470951517423


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 54, loss: 0.2119589837944066


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 55, loss: 0.21085898008611467


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 56, loss: 0.20882053160005146


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 57, loss: 0.20945603100238022


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 58, loss: 0.20672802621015796


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 59, loss: 0.2069717221458753


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 60, loss: 0.20542480011781056


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 61, loss: 0.20463022354024427


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 62, loss: 0.20370655153222658


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 63, loss: 0.20444259391890632


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 64, loss: 0.20098178050584264


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 65, loss: 0.19938766855884482


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 66, loss: 0.1996368433866236


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 67, loss: 0.19901957871185408


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 68, loss: 0.19791075702894617


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 69, loss: 0.19780332599939018


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 70, loss: 0.19674855011204878


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 71, loss: 0.1944823027346973


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 72, loss: 0.19662816797969518


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 73, loss: 0.19454258204610259


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 74, loss: 0.1912125021053685


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 75, loss: 0.19410717614822917


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 76, loss: 0.19058340925309394


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 77, loss: 0.19123809889510826


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 78, loss: 0.18993173251549403


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 79, loss: 0.1863285948805235


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 80, loss: 0.1873514280606199


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 81, loss: 0.18538340159036495


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 82, loss: 0.18618668909977984


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 83, loss: 0.18340220509027994


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 84, loss: 0.18263020866860946


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 85, loss: 0.18317254108411293


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 86, loss: 0.18222788397222758


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 87, loss: 0.1804222243141245


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 88, loss: 0.1808465858383311


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 89, loss: 0.18003541193626546


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 90, loss: 0.17992697290800236


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 91, loss: 0.18044517424923404


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 92, loss: 0.17623041987143181


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 93, loss: 0.17642281627213513


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 94, loss: 0.17601212396941804


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 95, loss: 0.17443067882899885


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 96, loss: 0.1732928352140718


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 97, loss: 0.17173368096075675


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 98, loss: 0.17155364856951766


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 99, loss: 0.1711503667894889


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 100, loss: 0.17184099220843227


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 101, loss: 0.1702836859640148


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 102, loss: 0.16807144798338414


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 103, loss: 0.16932107136757285


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 104, loss: 0.16637274849952924


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 105, loss: 0.16557610478666093


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 106, loss: 0.16470527751371264


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 107, loss: 0.16576993741922907


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 108, loss: 0.1648386134490095


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 109, loss: 0.1628960454988259


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 110, loss: 0.16272429412989706


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 111, loss: 0.16149182054179687


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 112, loss: 0.16007536942139267


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 113, loss: 0.1585815804513792


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 114, loss: 0.15897154484909995


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 115, loss: 0.15916628672568886


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 116, loss: 0.1569901554369264


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 117, loss: 0.1556891175331893


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 118, loss: 0.15619588214421162


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 119, loss: 0.15335257060549878


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 120, loss: 0.15476439293611932


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 121, loss: 0.153559591996449


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 122, loss: 0.15148101943372577


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 123, loss: 0.15042352442733115


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 124, loss: 0.15146425214216666


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 125, loss: 0.1486779528400964


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 126, loss: 0.14780988361134573


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 127, loss: 0.14681368669426


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 128, loss: 0.1453719646245655


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 129, loss: 0.14760068948760077


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 130, loss: 0.1452984463589059


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 131, loss: 0.14492754832048108


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 132, loss: 0.14309833705425262


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 133, loss: 0.1437392081954965


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 134, loss: 0.14223465102138344


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 135, loss: 0.14230525654085258


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 136, loss: 0.1399617095346804


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 137, loss: 0.13971886803016617


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 138, loss: 0.13890622984479975


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 139, loss: 0.1397318618585942


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 140, loss: 0.1369722989215343


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 141, loss: 0.13550900265160534


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 142, loss: 0.13617954192062218


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 143, loss: 0.13479655836467389


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 144, loss: 0.13306285369830828


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 145, loss: 0.13299422468645153


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 146, loss: 0.13239149417107304


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 147, loss: 0.13223116270507929


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 148, loss: 0.13153019943171076


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 149, loss: 0.13227609113962563


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 150, loss: 0.13230001281257028


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 151, loss: 0.1323810655913419


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 152, loss: 0.13017998295387737


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 153, loss: 0.1281729161304732


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 154, loss: 0.12965926238370162


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 155, loss: 0.12858187730182652


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 156, loss: 0.1270926427965363


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 157, loss: 0.1261371315450028


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 158, loss: 0.12644849162686755


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 159, loss: 0.12642993066873814


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 160, loss: 0.12310969422674842


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 161, loss: 0.12310679914636745


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 162, loss: 0.1222286772162274


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 163, loss: 0.12269564741187626


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 164, loss: 0.1228144505482029


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 165, loss: 0.1211721644964483


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 166, loss: 0.12153883092381336


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 167, loss: 0.1200877003626967


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 168, loss: 0.12092269027950588


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 169, loss: 0.11815122553733765


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 170, loss: 0.11862745312628922


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 171, loss: 0.11811132055465821


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 172, loss: 0.11739332925252341


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 173, loss: 0.1160139514423079


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 174, loss: 0.11548589426196283


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 175, loss: 0.11381929494854477


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 176, loss: 0.11413311986459626


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 177, loss: 0.11528162010979873


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 178, loss: 0.11336883786099929


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 179, loss: 0.11143101423978806


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 180, loss: 0.11234059543935238


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 181, loss: 0.11052551080783209


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 182, loss: 0.10991983741797783


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 183, loss: 0.11180201104462699


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 184, loss: 0.10869654829303424


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 185, loss: 0.10918211272745221


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 186, loss: 0.1079304284354051


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 187, loss: 0.10710078989741979


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 188, loss: 0.10695296689178105


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 189, loss: 0.107867915397165


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 190, loss: 0.1065141017459057


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 191, loss: 0.10746292737209134


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 192, loss: 0.10630576523917692


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 193, loss: 0.10493594243846557


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 194, loss: 0.10492801538220159


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 195, loss: 0.10361438818137955


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 196, loss: 0.10353842804277384


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 197, loss: 0.10197897073847276


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 198, loss: 0.1031316973634616


  0%|          | 0/675 [00:00<?, ?it/s]

epoch 199, loss: 0.10226832565058161


In [147]:
with torch.no_grad():
    x = trained_model(x_train)
x

tensor([[-0.6477,  1.0000,  0.1541,  ..., -0.1709,  0.3198, -0.6226],
        [-0.0176,  0.0421,  0.0164,  ..., -0.3842,  1.0000,  0.1207],
        [ 0.0216, -0.4108,  0.0042,  ...,  0.1730,  0.0289,  1.0000],
        ...,
        [-0.0385, -0.7287, -0.4030,  ..., -0.2124,  0.5450, -0.0332],
        [-0.0181, -0.8372, -0.2881,  ..., -0.2870,  0.7244, -0.1623],
        [ 0.1250,  1.0000, -0.2470,  ..., -0.6285,  0.3921,  0.3053]])

In [148]:
x[0]

tensor([-0.6477,  1.0000,  0.1541, -0.8998,  0.4770, -0.5109, -0.5580, -0.1709,
         0.3198, -0.6226])

In [129]:
y_train

tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        ...,
        [ 0.,  0.,  0.,  ..., -1.,  0.,  0.],
        [ 0., -1.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  ...,  0.,  0.,  1.]])

In [149]:
(x - y_train).square().mean()

tensor(0.2216)

In [131]:
len(x),len(y_train)

(5393, 5393)

In [150]:
pred_label = []
true_label = []
for i in range(len(x)):
    pred_label.append(decode_label(x[i]))
    true_label.append(decode_label(y_train[i]))

In [58]:
#After 20 epochs 
print("Classification report:\n", (classification_report(true_label, pred_label)))

Classification report:
               precision    recall  f1-score   support

           0       0.63      0.15      0.24       824
           1       0.47      0.02      0.03      1204
           2       0.00      0.00      0.00       242
           3       0.00      0.00      0.00       159
           4       0.50      0.00      0.01      1092
           5       0.00      0.00      0.00       503
           6       0.00      0.00      0.00       427
           7       0.00      0.00      0.00       367
           8       0.53      0.68      0.60      1726
           9       0.72      0.29      0.42      1438
          10       0.67      0.00      0.01       404
          11       0.40      0.07      0.11       986
          12       0.00      0.00      0.00       202
          13       0.00      0.00      0.00       382
          14       0.32      0.10      0.15       912
          15       0.00      0.00      0.00       699
          16       0.35      1.00      0.52      1883
   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [68]:
#After 100 epochs 
print("Classification report:\n", (classification_report(true_label, pred_label)))

Classification report:
               precision    recall  f1-score   support

           0       0.50      0.59      0.54       824
           1       0.63      0.38      0.48      1204
           2       0.47      0.07      0.12       242
           3       0.36      0.06      0.10       159
           4       0.58      0.30      0.40      1092
           5       0.41      0.16      0.23       503
           6       0.43      0.40      0.41       427
           7       0.26      0.17      0.20       367
           8       0.45      0.86      0.59      1726
           9       0.55      0.68      0.61      1438
          10       0.57      0.23      0.33       404
          11       0.55      0.28      0.37       986
          12       0.52      0.21      0.30       202
          13       0.61      0.27      0.38       382
          14       0.42      0.19      0.26       912
          15       0.50      0.06      0.11       699
          16       0.48      0.78      0.59      1883
   

  _warn_prf(average, modifier, msg_start, len(result))


In [151]:
#After 200 epochs 
print("Classification report:\n", (classification_report(true_label, pred_label)))

Classification report:
               precision    recall  f1-score   support

           0       0.53      0.67      0.59       824
           1       0.47      0.75      0.58      1204
           2       0.24      0.26      0.25       242
           3       0.21      0.40      0.27       159
           4       0.49      0.60      0.54      1092
           5       0.28      0.53      0.36       503
           6       0.29      0.57      0.39       427
           7       0.27      0.46      0.34       367
           8       0.58      0.78      0.66      1726
           9       0.55      0.78      0.64      1438
          10       0.26      0.71      0.38       404
          11       0.51      0.59      0.55       986
          12       0.16      0.73      0.27       202
          13       0.29      0.72      0.41       382
          14       0.40      0.57      0.47       912
          15       0.40      0.50      0.45       699
          16       0.62      0.78      0.69      1883
   

  _warn_prf(average, modifier, msg_start, len(result))


### Results 

In [78]:
val_argument_df["Stance_encoded"] = val_argument_df.apply(lambda row: 0 if row["Stance"] == "against" else 1, axis=1)

In [79]:
torch.cat(tuple(val_labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  1.],
        [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  1.,  0.],
        ...,
        [ 0.,  0.,  0.,  ...,  0.,  0., -1.],
        [ 0.,  1.,  0.,  ...,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  ...,  0.,  0.,  0.]])

In [80]:
encode_text(val_argument_df.iloc[0][0], 64).size()

torch.Size([512])

In [81]:
# Build features
val_conclusions = list()
with torch.no_grad():
    for i, v in val_argument_df.iterrows():
        val_conclusions.append(encode_text(v["Conclusion"], 64).reshape(1,-1))
val_conclusions = torch.cat(val_conclusions, dim=0)

val_arguments = list()
with torch.no_grad():
    for i, v in val_argument_df.iterrows():
        val_arguments.append(encode_text(v["Premise"], 256).reshape(1,-1))
val_arguments = torch.cat(val_arguments, dim=0)

val_stance = torch.Tensor(tuple(val_argument_df["Stance_encoded"])).reshape(-1,1)

In [82]:
val_stance = val_stance.reshape(-1,1)

In [83]:
val_x_train = torch.cat((val_arguments, val_arguments, val_stance), dim=1)
val_y_train = torch.cat(tuple(val_labels_df.apply(lambda row: torch.Tensor(encode_label(row)).reshape(1,-1), axis=1)), dim=0)

In [135]:
with torch.no_grad():
    val_x = trained_model(val_x_train)
val_x

tensor([[ 0.2320, -0.0157, -0.1802,  ..., -1.0000, -0.1863, -0.3216],
        [-0.2206,  0.4501, -0.2606,  ..., -0.5966,  0.0501,  0.0491],
        [-0.1667, -0.3600, -0.1189,  ..., -0.4886,  0.7311, -1.0000],
        ...,
        [ 0.5218,  0.2987,  0.4093,  ..., -0.1852,  1.0000, -0.1895],
        [ 0.3685,  0.3891,  0.0097,  ..., -0.1891,  1.0000, -0.7762],
        [-0.4081,  0.4775,  0.3604,  ...,  0.3109,  0.6973, -0.9407]])

In [136]:
(val_x - val_y_train).square().mean()

tensor(0.4865)

In [144]:
val_pred_label = []
val_true_label = []
for i in range(len(val_x)):
    val_pred_label.append(decode_label(val_x[i],0.1))
    val_true_label.append(decode_label(val_y_train[i],0.1))

In [145]:
#After 200 epochs 
print("Classification report:\n", (classification_report(val_true_label, val_pred_label)))

Classification report:
               precision    recall  f1-score   support

           0       0.10      0.18      0.13       229
           1       0.16      0.14      0.15       426
           2       0.07      0.31      0.11       134
           3       0.04      0.13      0.06        92
           4       0.25      0.27      0.26       407
           5       0.09      0.04      0.05       129
           6       0.06      0.11      0.08        82
           7       0.04      0.04      0.04       126
           8       0.37      0.92      0.53       668
           9       0.35      0.17      0.23       404
          10       0.08      0.58      0.14       150
          11       0.18      0.64      0.28       385
          12       0.02      0.21      0.04        56
          13       0.06      0.60      0.11       116
          14       0.27      0.51      0.35       465
          15       0.12      0.92      0.22       233
          16       0.36      0.88      0.51       637
   

  _warn_prf(average, modifier, msg_start, len(result))
