In [1]:
import os
import pandas as pd
from datasets import load_dataset
import random

## Load data from local files

Data PATH

In [2]:
data_dir = os.path.join('dataset')
arguments_training_filepath = os.path.join(data_dir, 'arguments-training.tsv')
arguments_validation_filepath = os.path.join(data_dir, 'arguments-validation.tsv')
arguments_validation_filepath_zhihu = os.path.join(data_dir, 'arguments-validation-zhihu.tsv')
arguments_test_filepath = os.path.join(data_dir, 'arguments-test.tsv')

labels_training_filepath = os.path.join(data_dir, 'labels-training.tsv')
labels_validation_filepath = os.path.join(data_dir, 'labels-validation.tsv')
labels_validation_filepath_zhihu = os.path.join(data_dir, 'labels-validation-zhihu.tsv')
labels_test_filepath = os.path.join(data_dir, 'labels-test.tsv')

Load the data

In [3]:
arguments_training = pd.read_csv(arguments_training_filepath, encoding='utf-8', sep='\t', header=0)
arguments_validation = pd.read_csv(arguments_validation_filepath, encoding='utf-8', sep='\t', header=0)
arguments_validation_zhihu = pd.read_csv(arguments_validation_filepath_zhihu, encoding='utf-8', sep='\t', header=0)
arguments_test = pd.read_csv(arguments_test_filepath, encoding='utf-8', sep='\t', header=0)

labels_training = pd.read_csv(labels_training_filepath, encoding='utf-8', sep='\t', header=0)
labels_validation = pd.read_csv(labels_validation_filepath, encoding='utf-8', sep='\t', header=0)
labels_validation_zhihu = pd.read_csv(labels_validation_filepath_zhihu, encoding='utf-8', sep='\t', header=0)
labels_test = pd.read_csv(labels_test_filepath, encoding='utf-8', sep='\t', header=0)

arguments_training.head()


Unnamed: 0,Argument ID,Conclusion,Stance,Premise
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...
1,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...
2,A01003,We should abandon marriage,against,marriage is the ultimate commitment to someone...
3,A01004,We should ban naturopathy,against,it provides a useful income for some people
4,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...


## Some helper functions

In [4]:
LABELS = ['Self-direction thought', 'Self-direction action', 'Stimulation', 'Hedonism', 'Achievement', 'Power dominance', 'Power resources', 'Face', 'Security personal', 'Security societal', 'Tradition', 'Conformity rules', 'Conformity interpersonal', 'Humility', 'Benevolence caring', 'Benevolence dependability', 'Universalism concern', 'Universalism nature', 'Universalism tolerance', 'Universalism objectivity']
PROMPT_FORMATS = ["The premise: '{}' is '{}'. The conclusion is '{}'. Value category: {}\n Question: Which value category does the argument belong to?\n",
                  "Premise: {}\nStance: {}\nConclusion: {}. Value category: {}\n Question: Which value category does the argument belong to?\n",
                  "Argument: {}. {}. {}. Value category: {}\n Question: Which value category does the argument belong to?\n"]

def convert_binary_labels_to_string(df):
    label_names = df.columns[1:]
    string_labels = []

    for index, row in df.iterrows():
        binary_values = row.values[1:]
        string_labels.append([label_names[i] for i, value in enumerate(binary_values) if value == 1])

    df['String Labels'] = string_labels
    return df

def ensemble_prompt(df):
    prompts = [
        [
            prompt.format(row['Premise'], row['Stance'], row['Conclusion'], ', '.join(LABELS))
            for prompt in PROMPT_FORMATS
        ]
        for _, row in df.iterrows()
    ]

    df['ensemble_prompt'] = prompts
    return df

def few_shot_prompt(df, num_shots=1, prompt_format=0, random_seed=46):
    """Creates a few shot prompt for each argument"""

    prompt_format = PROMPT_FORMATS[prompt_format]
    
    selected_arguments = df.sample(n=num_shots, random_state=random_seed)
    few_shot_prompts = [
        prompt_format.format(row['Premise'], row['Stance'], row['Conclusion'], ', '.join(LABELS)) + f"Answer: {random.choice(LABELS)}\n"
        for _, row in selected_arguments.iterrows()
    ]
    # prompts = [
    #     df.apply(lambda row: ''.join(few_shot_prompts) + prompt_format.format(row['Premise'], row['Stance'], row['Conclusion'], ', '.join(LABELS)) + f"Answer: \n", axis=1)
    # ]

    df['few_shot_prompt'] = df.apply(lambda row: ''.join(few_shot_prompts) + prompt_format.format(row['Premise'], row['Stance'], row['Conclusion'], ', '.join(LABELS)) + f"Answer: \n", axis=1)
    return df

def combine_columns(df_arguments, df_labels):
    """Combines the two `DataFrames` on column `Argument ID`"""
    df_labels = df_labels[['Argument ID', 'String Labels']]
    df_labels.columns = ['Argument ID', 'Labels']

    return pd.merge(df_arguments, df_labels, on='Argument ID')

def labels_to_multi_choice():
    """Converts the labels to a multi choice format"""
    multi_choice_format = "{}: {}"
    multi_choice_options = []

    for index, label in enumerate(LABELS):
        multi_choice_option = multi_choice_format.format(chr(65 + index), label)
        multi_choice_options.append(multi_choice_option)

    return multi_choice_options


## Convert the binary labels

In [5]:
labels = labels_training.columns.unique()
labels = [label for label in labels if label != 'Argument ID' and label != 'String Labels']
labels = [label.replace(':', '') for label in labels]

In [6]:
ensemble_prompt_df = ensemble_prompt(arguments_training)
ensemble_prompt_df['ensemble_prompt'][0]

["The premise: 'if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal?' is 'in favor of'. The conclusion is 'Entrapment should be legalized'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Tradition, Conformity rules, Conformity interpersonal, Humility, Benevolence caring, Benevolence dependability, Universalism concern, Universalism nature, Universalism tolerance, Universalism objectivity\n Question: Which value category does the argument belong to?\n",
 "Premise: if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal?\nStance: in favor of\nConclusion: Entrapment should be legalized. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Tradition, Conform

In [7]:
few_shot_promp_df = few_shot_prompt(arguments_training, num_shots=2, prompt_format=0)
few_shot_promp_df['few_shot_prompt'][0]

"The premise: 'people should keep their religion to themselves' is 'in favor of'. The conclusion is 'We should ban missionary work'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Tradition, Conformity rules, Conformity interpersonal, Humility, Benevolence caring, Benevolence dependability, Universalism concern, Universalism nature, Universalism tolerance, Universalism objectivity\n Question: Which value category does the argument belong to?\nAnswer: Tradition\nThe premise: 'guantanamo bay exists to remove dangerous terrorists from the outside world, eliminated their ability to recruit and plan attacks.' is 'against'. The conclusion is 'We should close Guantanamo Bay detention camp'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, 

In [8]:
converted_labels_train = convert_binary_labels_to_string(labels_training)
train = combine_columns(arguments_training, converted_labels_train)

train.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,ensemble_prompt,few_shot_prompt,Labels
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,[The premise: 'if entrapment can serve to more...,The premise: 'people should keep their religio...,[Security: societal]
1,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,[The premise: 'we should ban human cloning as ...,The premise: 'people should keep their religio...,[Security: societal]
2,A01003,We should abandon marriage,against,marriage is the ultimate commitment to someone...,[The premise: 'marriage is the ultimate commit...,The premise: 'people should keep their religio...,[Self-direction: action]
3,A01004,We should ban naturopathy,against,it provides a useful income for some people,[The premise: 'it provides a useful income for...,The premise: 'people should keep their religio...,[Security: personal]
4,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,[The premise: 'fast food should be banned beca...,The premise: 'people should keep their religio...,[Security: personal]


## Load the data from disk

In [14]:
from datasets import load_dataset, Dataset, load_from_disk
dataset_path = 'dataset/processed/processed_dataset'
dataset = load_from_disk(dataset_path)

{'Argument ID': 'A01001',
 'Conclusion': 'Entrapment should be legalized',
 'Stance': 'in favor of',
 'Premise': "if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal?",
 'ensemble_prompt': ["The premise: 'if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal?' is 'in favor of'. The conclusion is 'Entrapment should be legalized'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Tradition, Conformity rules, Conformity interpersonal, Humility, Benevolence caring, Benevolence dependability, Universalism concern, Universalism nature, Universalism tolerance, Universalism objectivity\n Question: Which value category does the argument belong to?\n",
  "Premise: if entrapment can serve to more easily capture wanted criminals, then why shouldn't it be legal?\nStance: in favor of\nConcl

## Setup the model

In [9]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

2023-05-13 00:46:08.517440: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Select 10 random samples 

In [10]:
train_10 = train.sample(10)
train_10

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,ensemble_prompt,few_shot_prompt,Labels
805,A13002,We should legalize organ trade,against,legalizing the organ trade would lead to peopl...,[The premise: 'legalizing the organ trade woul...,The premise: 'people should keep their religio...,"[Security: personal, Security: societal]"
3375,A23476,We should close Guantanamo Bay detention camp,against,Detaining terrorists at Guantanamo Bay ensures...,[The premise: 'Detaining terrorists at Guantan...,The premise: 'people should keep their religio...,"[Security: societal, Universalism: concern]"
2953,A22480,We should subsidize embryonic stem cell research,against,stem cell research is seen as unethical and w...,[The premise: 'stem cell research is seen as u...,The premise: 'people should keep their religio...,"[Face, Tradition, Humility, Benevolence: depen..."
1736,A20025,Blockade of the Gaza Strip should be ended,against,blockage of the gaza strip should not be ended...,[The premise: 'blockage of the gaza strip shou...,The premise: 'people should keep their religio...,"[Security: societal, Universalism: concern]"
4655,A18434,We should subsidize student loans,in favor of,student loans is a type of aid that is given t...,[The premise: 'student loans is a type of aid ...,The premise: 'people should keep their religio...,"[Achievement, Power: resources, Security: pers..."
1915,A20240,We should abolish the three-strikes laws,against,the three-strikes law is effective at deterrin...,[The premise: 'the three-strikes law is effect...,The premise: 'people should keep their religio...,"[Security: societal, Conformity: rules, Benevo..."
4153,A25399,We should fight urbanization,in favor of,we should fight urbanization to protect the oz...,[The premise: 'we should fight urbanization to...,The premise: 'people should keep their religio...,[Universalism: nature]
160,A05096,We should prohibit school prayer,against,a non-denominational school prayer is about as...,[The premise: 'a non-denominational school pra...,The premise: 'people should keep their religio...,"[Self-direction: thought, Tradition, Universal..."
3658,A24306,We should adopt gender-neutral language,against,gender characteristics are part of a person's ...,[The premise: 'gender characteristics are part...,The premise: 'people should keep their religio...,"[Face, Tradition, Conformity: rules, Humility,..."
441,A12067,We should abolish zoos,in favor of,zoos should be abolished so that animals can t...,[The premise: 'zoos should be abolished so tha...,The premise: 'people should keep their religio...,"[Benevolence: caring, Benevolence: dependabili..."


## Zero-shot Prompting

In [11]:
def query_from_list(query):
    inputs = tokenizer(query, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=20)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

for index, row in train_10.iterrows():
    for index, prompt in enumerate(row['ensemble_prompt']):
        result = query_from_list(prompt)
        print(f"Template: {index} \n Prediction: {result[0]}\n True Label: {row['Labels']}\n")

Template: 0 
 Prediction: Self-direction action
 True Label: ['Security: personal', 'Security: societal']

Template: 1 
 Prediction: Value category
 True Label: ['Security: personal', 'Security: societal']

Template: 2 
 Prediction: Value category
 True Label: ['Security: personal', 'Security: societal']

Template: 0 
 Prediction: Self-direction action
 True Label: ['Security: societal', 'Universalism: concern']

Template: 1 
 Prediction: Self-direction action
 True Label: ['Security: societal', 'Universalism: concern']

Template: 2 
 Prediction: Value category
 True Label: ['Security: societal', 'Universalism: concern']

Template: 0 
 Prediction: Universalism
 True Label: ['Face', 'Tradition', 'Humility', 'Benevolence: dependability', 'Universalism: nature']

Template: 1 
 Prediction: Value category
 True Label: ['Face', 'Tradition', 'Humility', 'Benevolence: dependability', 'Universalism: nature']

Template: 2 
 Prediction: Value category
 True Label: ['Face', 'Tradition', 'Humility'

## Few-shot classification promt

In [12]:
def query_from_list(query):
    inputs = tokenizer(query, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=30)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

for index, row in train_10.iterrows():
    result = query_from_list(row['few_shot_prompt'])
    print(f"{row['few_shot_prompt']:<24} {result[0]}\n True Label: {row['Labels']}\n")

The premise: 'people should keep their religion to themselves' is 'in favor of'. The conclusion is 'We should ban missionary work'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Tradition, Conformity rules, Conformity interpersonal, Humility, Benevolence caring, Benevolence dependability, Universalism concern, Universalism nature, Universalism tolerance, Universalism objectivity
 Question: Which value category does the argument belong to?
Answer: Tradition
The premise: 'guantanamo bay exists to remove dangerous terrorists from the outside world, eliminated their ability to recruit and plan attacks.' is 'against'. The conclusion is 'We should close Guantanamo Bay detention camp'. Value category: Self-direction thought, Self-direction action, Stimulation, Hedonism, Achievement, Power dominance, Power resources, Face, Security personal, Security societal, Trad