### Fine-tuning LLM to detect Fallacies in Arguments

Using data from:

Ruiz-Dolz, R., & Lawrence, J. (2023, December). Detecting Argumentative Fallacies in the Wild: Problems and Limitations of Large Language Models. In M. Alshomary, C.-C. Chen, S. Muresan, J. Park, & J. Romberg (Eds.), Proceedings of the 10th Workshop on Argument Mining (pp. 1–10). Retrieved from https://aclanthology.org/2023.argmining-1.1

https://github.com/raruidol/ArgumentMining23-Fallacy/blob/main/README.md




In [4]:
from datasets import load_dataset, DatasetDict, Dataset

from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_fscore_support
import evaluate
import torch
import numpy as np
import json


In [3]:
try:
      with open("fallacy_corpus.json") as filehandle:
          json_data = json.load(filehandle)
except:
      print('The file is not available.')
      exit()

In [5]:
data = {'train': {}, 'dev': {}, 'test': {}}

data['train']['label'] = []
data['train']['text'] = []
data['dev']['label'] = []
data['dev']['text'] = []
data['test']['label'] = []
data['test']['text'] = []

fallacyStringConversions = {
'None': 0,
'AdHominem': 1,
'AppealtoEmotion': 2, 
'AppealtoAuthority': 3, 
'Slipperyslope': 4,
'AppealtoMajority': 5
}

fallacyNumberConversions = {
0: 'None',
1: 'AdHominem',
2: 'AppealtoEmotion', 
3: 'AppealtoAuthority', 
4: 'Slipperyslope',
5: 'AppealtoMajority'
}

for sample in json_data['train']:
    data['train']['text'].append(sample[0])
    data['train']['label'].append(fallacyStringConversions[sample[1]])

for sample in json_data['dev']:
        data['dev']['text'].append(sample[0])
        data['dev']['label'].append(fallacyStringConversions[sample[1]])

for sample in json_data['test']:
        data['test']['text'].append(sample[0])
        data['test']['label'].append(fallacyStringConversions[sample[1]])


final_data = DatasetDict()
for k, v in data.items():
    final_data[k] = Dataset.from_dict(v)
    

Create Model

In [6]:
model_checkpoint = 'distilbert-base-uncased'
# model_checkpoint = 'roberta-base' # you can alternatively use roberta-base but this model is bigger thus training will take longer


# generate classification model from model_checkpoint
model = AutoModelForSequenceClassification.from_pretrained( model_checkpoint, num_labels=6, ignore_mismatched_sizes=True)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Pre-process data 

In [7]:
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)

# add pad token if none exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

In [8]:
# create tokenize function
def tokenize_function(examples):
    # extract text
    text = examples["text"]

    #tokenize and truncate text
    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors="np",
        truncation=True,
        max_length=512
    )

    return tokenized_inputs

In [9]:
# tokenize training and validation datasets
tokenized_dataset = final_data.map(tokenize_function, batched=True)
tokenized_dataset

Map: 100%|██████████| 2152/2152 [00:00<00:00, 11224.65 examples/s]
Map: 100%|██████████| 266/266 [00:00<00:00, 9983.40 examples/s]
Map: 100%|██████████| 270/270 [00:00<00:00, 11446.99 examples/s]


DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 2152
    })
    dev: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 266
    })
    test: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 270
    })
})

In [10]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Evaluate Model

In [11]:
# import accuracy evaluation metric
accuracy = evaluate.load("accuracy")

Downloading builder script: 100%|██████████| 4.20k/4.20k [00:00<00:00, 6.76MB/s]


In [12]:
# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {"accuracy": accuracy.compute(predictions=predictions, references=labels)}

Train Model

In [13]:
peft_config = LoraConfig(task_type="SEQ_CLS",
                        r=4,
                        lora_alpha=32,
                        lora_dropout=0.01,
                        target_modules = ['q_lin'])
peft_config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=4, target_modules={'q_lin'}, lora_alpha=32, lora_dropout=0.01, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)

In [14]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 632,070 || all params: 67,590,156 || trainable%: 0.9352


In [15]:
# hyperparameters
lr = 1e-3
batch_size = 4
num_epochs = 10

In [16]:
# define training arguments
training_args = TrainingArguments(
    output_dir= model_checkpoint + "-lora-text-classification",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)



In [18]:
# creater trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["dev"],
    tokenizer=tokenizer,
    data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length
    compute_metrics=compute_metrics,
)

In [19]:
# train model
trainer.train()

  9%|▉         | 501/5380 [03:06<54:03,  1.50it/s]  

{'loss': 0.9636, 'grad_norm': 7.326727390289307, 'learning_rate': 0.0009070631970260224, 'epoch': 0.93}


 10%|█         | 538/5380 [03:19<21:35,  3.74it/s]  Trainer is attempting to log a value of "{'accuracy': 0.6992481203007519}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                  
 10%|█         | 538/5380 [03:35<21:35,  3.74it/s]

{'eval_loss': 0.8581797480583191, 'eval_accuracy': {'accuracy': 0.6992481203007519}, 'eval_runtime': 16.2397, 'eval_samples_per_second': 16.38, 'eval_steps_per_second': 4.126, 'epoch': 1.0}


 19%|█▊        | 1001/5380 [05:21<10:01,  7.28it/s] 

{'loss': 0.7447, 'grad_norm': 2.898487091064453, 'learning_rate': 0.0008141263940520446, 'epoch': 1.86}


 20%|██        | 1076/5380 [05:36<13:22,  5.37it/s]Trainer is attempting to log a value of "{'accuracy': 0.7030075187969925}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 20%|██        | 1076/5380 [05:42<13:22,  5.37it/s]

{'eval_loss': 1.001013159751892, 'eval_accuracy': {'accuracy': 0.7030075187969925}, 'eval_runtime': 5.5637, 'eval_samples_per_second': 47.81, 'eval_steps_per_second': 12.042, 'epoch': 2.0}


 28%|██▊       | 1501/5380 [07:24<13:13,  4.89it/s]  

{'loss': 0.5876, 'grad_norm': 4.656132698059082, 'learning_rate': 0.0007211895910780669, 'epoch': 2.79}


 30%|███       | 1614/5380 [07:57<21:17,  2.95it/s]  Trainer is attempting to log a value of "{'accuracy': 0.7142857142857143}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 30%|███       | 1614/5380 [08:04<21:17,  2.95it/s]

{'eval_loss': 0.9129266142845154, 'eval_accuracy': {'accuracy': 0.7142857142857143}, 'eval_runtime': 6.5184, 'eval_samples_per_second': 40.808, 'eval_steps_per_second': 10.279, 'epoch': 3.0}


 37%|███▋      | 2001/5380 [09:31<10:46,  5.23it/s]  

{'loss': 0.4728, 'grad_norm': 5.205290794372559, 'learning_rate': 0.0006282527881040893, 'epoch': 3.72}


 40%|████      | 2152/5380 [10:04<09:44,  5.53it/s]Trainer is attempting to log a value of "{'accuracy': 0.7255639097744361}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 40%|████      | 2152/5380 [10:10<09:44,  5.53it/s]

{'eval_loss': 1.164086103439331, 'eval_accuracy': {'accuracy': 0.7255639097744361}, 'eval_runtime': 6.413, 'eval_samples_per_second': 41.478, 'eval_steps_per_second': 10.447, 'epoch': 4.0}


 46%|████▋     | 2501/5380 [11:58<09:03,  5.30it/s]  

{'loss': 0.4145, 'grad_norm': 69.51883697509766, 'learning_rate': 0.0005353159851301115, 'epoch': 4.65}


 50%|█████     | 2690/5380 [12:41<08:45,  5.12it/s]Trainer is attempting to log a value of "{'accuracy': 0.7330827067669173}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 50%|█████     | 2690/5380 [12:47<08:45,  5.12it/s]

{'eval_loss': 1.3646290302276611, 'eval_accuracy': {'accuracy': 0.7330827067669173}, 'eval_runtime': 5.4273, 'eval_samples_per_second': 49.011, 'eval_steps_per_second': 12.345, 'epoch': 5.0}


 56%|█████▌    | 3000/5380 [13:52<10:08,  3.91it/s]  

{'loss': 0.2917, 'grad_norm': 0.1289292275905609, 'learning_rate': 0.00044237918215613383, 'epoch': 5.58}


 60%|█████▉    | 3227/5380 [36:38<06:16,  5.71it/s]     Trainer is attempting to log a value of "{'accuracy': 0.7556390977443609}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 60%|██████    | 3228/5380 [36:43<06:16,  5.71it/s]

{'eval_loss': 1.4447352886199951, 'eval_accuracy': {'accuracy': 0.7556390977443609}, 'eval_runtime': 5.5195, 'eval_samples_per_second': 48.193, 'eval_steps_per_second': 12.139, 'epoch': 6.0}


 65%|██████▌   | 3501/5380 [37:59<05:35,  5.61it/s]  

{'loss': 0.2485, 'grad_norm': 59.20607376098633, 'learning_rate': 0.00034944237918215615, 'epoch': 6.51}


 70%|███████   | 3766/5380 [39:04<05:34,  4.82it/s]Trainer is attempting to log a value of "{'accuracy': 0.7330827067669173}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 70%|███████   | 3766/5380 [39:10<05:34,  4.82it/s]

{'eval_loss': 1.7574362754821777, 'eval_accuracy': {'accuracy': 0.7330827067669173}, 'eval_runtime': 5.4292, 'eval_samples_per_second': 48.995, 'eval_steps_per_second': 12.341, 'epoch': 7.0}


 74%|███████▍  | 4001/5380 [40:08<06:26,  3.57it/s]

{'loss': 0.1596, 'grad_norm': 21.033737182617188, 'learning_rate': 0.0002565055762081784, 'epoch': 7.43}


 80%|████████  | 4304/5380 [42:15<04:32,  3.96it/s]Trainer is attempting to log a value of "{'accuracy': 0.7330827067669173}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 80%|████████  | 4304/5380 [42:35<04:32,  3.96it/s]

{'eval_loss': 1.9366859197616577, 'eval_accuracy': {'accuracy': 0.7330827067669173}, 'eval_runtime': 19.2713, 'eval_samples_per_second': 13.803, 'eval_steps_per_second': 3.477, 'epoch': 8.0}


 84%|████████▎ | 4500/5380 [44:05<05:06,  2.87it/s]  

{'loss': 0.1468, 'grad_norm': 0.0021800671238452196, 'learning_rate': 0.00016356877323420074, 'epoch': 8.36}


 90%|█████████ | 4842/5380 [45:53<03:54,  2.29it/s]Trainer is attempting to log a value of "{'accuracy': 0.7368421052631579}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
 90%|█████████ | 4842/5380 [46:02<03:54,  2.29it/s]

{'eval_loss': 2.0091094970703125, 'eval_accuracy': {'accuracy': 0.7368421052631579}, 'eval_runtime': 8.05, 'eval_samples_per_second': 33.044, 'eval_steps_per_second': 8.323, 'epoch': 9.0}


 93%|█████████▎| 5000/5380 [46:46<01:17,  4.89it/s]

{'loss': 0.1128, 'grad_norm': 0.0014380423817783594, 'learning_rate': 7.063197026022306e-05, 'epoch': 9.29}


100%|██████████| 5380/5380 [48:22<00:00,  4.68it/s]Trainer is attempting to log a value of "{'accuracy': 0.7330827067669173}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                                   
100%|██████████| 5380/5380 [48:36<00:00,  4.68it/s]

{'eval_loss': 2.0656931400299072, 'eval_accuracy': {'accuracy': 0.7330827067669173}, 'eval_runtime': 13.3405, 'eval_samples_per_second': 19.939, 'eval_steps_per_second': 5.022, 'epoch': 10.0}


100%|██████████| 5380/5380 [48:37<00:00,  1.84it/s]

{'train_runtime': 2917.0913, 'train_samples_per_second': 7.377, 'train_steps_per_second': 1.844, 'train_loss': 0.38968762146052816, 'epoch': 10.0}





TrainOutput(global_step=5380, training_loss=0.38968762146052816, metrics={'train_runtime': 2917.0913, 'train_samples_per_second': 7.377, 'train_steps_per_second': 1.844, 'total_flos': 599914265851008.0, 'train_loss': 0.38968762146052816, 'epoch': 10.0})

Make Predictions With Model

In [21]:

dev_predictions = trainer.predict(tokenized_dataset['dev'])
dev_predict = np.argmax(dev_predictions.predictions, axis=-1)
test_predictions = trainer.predict(tokenized_dataset['test'])
test_predict = np.argmax(test_predictions.predictions, axis=-1)



# mf1_dev = precision_recall_fscore_support(tokenized_dataset['dev']['label'], dev_predict, average='macro')
# mf1_test = precision_recall_fscore_support(tokenized_dataset['test']['label'], test_predict, average='macro')

# print('Score in, DEV:', mf1_dev, 'TEST:', mf1_test)
# print('Confusion matrix:')
# print(confusion_matrix(tokenized_dataset['test']['label'], test_predict))

100%|██████████| 67/67 [00:07<00:00,  9.57it/s]
100%|██████████| 68/68 [00:06<00:00, 11.07it/s]

[4 0 0 4 0 0 0 0 0 0 0 0 0 4 4 0 4 4 0 0 0 0 0 0 4 0 0 1 3 3 0 3 0 0 3 3 0
 0 0 0 3 3 0 0 3 3 0 0 0 3 3 0 3 3 1 3 3 0 0 0 0 0 3 0 4 3 3 0 3 1 5 0 3 0
 1 0 3 0 0 0 1 3 0 0 0 0 0 0 5 0 0 0 0 4 0 5 0 0 5 0 5 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 5 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 5 0 0 0 0 0 0
 0 0 5 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 5 0 0
 0 0 0 0 0 0 0]
[0 0 0 0 0 0 3 0 4 0 0 0 4 4 0 4 0 3 0 4 0 0 4 0 4 0 4 3 3 3 3 0 3 4 0 0 0
 3 0 0 0 0 3 0 3 3 0 1 0 0 3 3 0 0 0 3 0 3 3 3 0 0 3 5 3 0 0 0 0 0 3 0 0 1
 1 3 0 1 3 0 0 3 3 0 3 0 0 0 0 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 4
 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 5 0 5 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 3 0 0 0 0 0 0 0 0 0 0 0
 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 4 0 0 0 0 0 0
 0 0 0 0 




In [31]:
print("Actual\t| Predicted")
count = 0
for i, prediction in enumerate(dev_predict):
    if prediction == data['dev']['label'][i]:
        count += 1
    print(fallacyNumberConversions[data['dev']['label'][i]] + " | "+ fallacyNumberConversions[prediction])
print("Accuracy: " + str(count/len(dev_predict) * 100))

Actual	| Predicted
Slipperyslope | Slipperyslope
Slipperyslope | None
Slipperyslope | None
Slipperyslope | Slipperyslope
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | Slipperyslope
Slipperyslope | Slipperyslope
Slipperyslope | None
Slipperyslope | Slipperyslope
Slipperyslope | Slipperyslope
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | None
Slipperyslope | Slipperyslope
Slipperyslope | None
Slipperyslope | None
AppealtoAuthority | AdHominem
AppealtoAuthority | AppealtoAuthority
AppealtoAuthority | AppealtoAuthority
AppealtoAuthority | None
AppealtoAuthority | AppealtoAuthority
AppealtoAuthority | None
AppealtoAuthority | None
AppealtoAuthority | AppealtoAuthority
AppealtoAuthority | AppealtoAuthority
AppealtoAuthority | None
AppealtoAuthority | None
Appeal

Save Model

In [None]:
from huggingface_hub import notebook_login
notebook_login() # ensure token gives write access

In [45]:
hf_name = 'lennon2020' 
model_id = hf_name + "/" + "lora-fallacy-classification" # you can name the model whatever you want

In [46]:
model.push_to_hub(model_id) # save model
trainer.push_to_hub(model_id) # save trainer

adapter_model.safetensors: 100%|██████████| 2.53M/2.53M [00:03<00:00, 821kB/s] 

[A

adapter_model.safetensors:   0%|          | 0.00/2.53M [00:00<?, ?B/s]

training_args.bin: 100%|██████████| 5.18k/5.18k [00:00<00:00, 20.5kB/s]30MB/s]
events.out.tfevents.1716251357.Lennons-MacBook-Air.local.49143.0: 100%|██████████| 10.2k/10.2k [00:00<00:00, 50.1kB/s]
adapter_model.safetensors: 100%|██████████| 2.53M/2.53M [00:03<00:00, 759kB/s] 

Upload 3 LFS files: 100%|██████████| 3/3 [00:03<00:00,  1.26s/it]


CommitInfo(commit_url='https://huggingface.co/lennon2020/distilbert-base-uncased-lora-text-classification/commit/98f9ade3b7f9e4024d2ebd9cf70a5eea63d5bbc7', commit_message='lennon2020/lora-fallacy-classification', commit_description='', oid='98f9ade3b7f9e4024d2ebd9cf70a5eea63d5bbc7', pr_url=None, pr_revision=None, pr_num=None)