In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from transformers import AutoTokenizer
import torch
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df_val = pd.read_csv("../data/dev_original.tsv", sep="\t")

In [4]:
df_train = pd.read_csv("../data/train_original.tsv", sep="\t")

In [23]:
df_train

Unnamed: 0,Cannabis should be legal.,It's not a bad thing to make marijuana more available.,support,(cannabis; synonym of; marijuana)(legal; causes; more available)(marijuana; capable of; good thing)(good thing; desires; legal)
0,Women should not be in combat.,Women and men have the same rights.,counter,(women and men; is a; citizens)(citizens; caus...
1,People will use marijuana independent of its l...,People use marijuana everywhere now.,support,(marijuana; receives action; popular)(popular;...
2,women should not partake in war,the armed forces are more open to recruiting w...,counter,(armed forces; desires; nurses and helpers)(nu...
3,Marijuana should not be legalized.,Marijuana is dangerous for society.,support,(marijuana; is a; recreational drug)(recreatio...
4,countries should ban the use of marijuana,everyone has the right to choose what to smoke.,counter,(everyone; receives action; has the right)(has...
...,...,...,...,...
2362,telemarketing should be dictated by the market.,Telemarketing offers no value to society.,counter,(telemarketing; not capable of; value to socie...
2363,Acting takes children away from their educatio...,Some children enjoy acting.,counter,(some children; capable of; acting)(acting; ha...
2364,Some people's skills are better suited to area...,The military has a need for a great many skills.,counter,(military; has property; main purpose)(main pu...
2365,School prayer allows student to do what they a...,School prayer should be discouraged as it teac...,counter,(parents; has property; encourage children's b...


In [15]:
df = pd.concat([df_train, df_val], axis=0)

In [19]:
df = df_train.append(df_val, ignore_index=True)

  df = df_train.append(df_val, ignore_index=True)


In [6]:
train_val, test = train_test_split(df, random_state=1, test_size=0.1)

In [7]:
train, val = train_test_split(train_val, random_state=1, test_size=0.1)

In [None]:
train.to_csv("../data/train.tsv", sep="\t")

In [8]:
train

Unnamed: 0,Cannabis should be legal.,It's not a bad thing to make marijuana more available.,support,(cannabis; synonym of; marijuana)(legal; causes; more available)(marijuana; capable of; good thing)(good thing; desires; legal),marriage is pase.,Not everyone believes in marriage anymore.,(marriage; capable of; deceiving)(deceiving; created by; pase)(pase; used for; everyone)(everyone; capable of; believes)
142,,,counter,,urbanization creates high crime.,People migrate to cities in order to make money.,(cities; capable of; job)(job; used for; make ...
1066,executives are not overpaid for the work they do.,Executives work quite hard and deserve their pay.,support,(executives; capable of; work quite hard)(work...,,,
2334,Telemarketers has nothing to offer only to rip...,Not all telemarketers are scammers most have e...,counter,(telemarketers; capable of; enough to offer)(e...,,,
94,,,support,,Embryotic stem cells can save lives,Embryotic stem cells are something that is in ...,(embryotic stem cells; capable of; interest of...
1508,The olympics have lost their luster because wi...,Athletes are tested for drugs and can't compet...,counter,(athletes; capable of; tested for drugs)(teste...,,,
...,...,...,...,...,...,...,...
518,zero tolerance could have deep consequences fo...,Zero tolerance implies harsher penalties.,support,(zero tolerance; causes; harsher punishment)(h...,,,
1236,The government is obliged to ban naturopathy.,Naturopathy is experimental and the government...,support,(naturopathy; is a; experimental)(experimental...,,,
1965,Safe spaces are a redundant and unnecessary pr...,Some people have no support or guidance and ne...,counter,(safe spaces; capable of; support or guidance)...,,,
681,Someone with a history of criminal behavior sh...,Repeat offenders have not learned their lesson...,support,(repeat offenders; has property; criminal beha...,,,


In [None]:
val.to_csv("../data/val.tsv", sep="\t")

In [None]:
test.to_csv("../data/test.tsv", sep="\t")

In [None]:
class ExplaGraphs(Dataset):
    def __init__(self, model_name, split="train", use_graphs=False):
        print(f"Use graph explanations = {use_graphs}")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        df = pd.read_csv(f"../data/{split}.tsv", sep="\t")
        premises, arguments, self.labels, explanations = df.to_numpy().T
        self.label_converter = {"counter": 0, "support": 1}
        self.label_inverter = {0: "counter", 1: "support"}
        explanations = [self.clean_string(x) for x in explanations]
        if use_graphs == True:
            self.features = [prem + " [SEP] " + arg + " [SEP] " + exp for prem,arg,exp in zip(premises, arguments, explanations)]
        else:
            self.features = [prem + " [SEP] " + arg for prem,arg in zip(premises, arguments)]
            
        encodings = self.tokenizer(self.features, truncation=True, padding=True)
        self.input_ids, self.attention_masks = encodings["input_ids"], encodings["attention_mask"]
        
    def clean_string(self, x):
        x = x.replace(")(", ", ")
        return x.replace("(", "").replace(")","").replace(";", "")
    
    def __len__(self):
        return len(self.input_ids)
    
    def __getitem__(self, idx):
        return torch.LongTensor(self.input_ids[idx]), torch.BoolTensor(self.attention_masks[idx]), self.label_converter[self.labels[idx]]

In [None]:
train = ExplaGraphs("bert-base-uncased", split="dev")

In [None]:
x = train.features[1]

In [None]:
x