# q3fer/distilbert-base-fallacy-classification

Source: https://huggingface.co/q3fer/distilbert-base-fallacy-classification

### Import necessary packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, TextClassificationPipeline 
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix
import mlflow
from mlflow.transformers import log_model
import logging 
from logging import getLogger
from transformers import AutoConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
import torch


from sklearn.model_selection import train_test_split
#  import pickle
import warnings # why? 
from mlflow.sklearn import save_model 
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # This tells Hugging Face: “Don’t use parallel tokenization — avoid possible deadlocks.”

import seaborn as sns
import matplotlib.pyplot as plt

import config 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from basic_functions import (
    get_eval_metrics,
    createTrainer, 
    get_encode_tokenize_data
    )

In [3]:
MODEL_NAME = "q3fer/distilbert-base-fallacy-classification" # pulls the fallacy trained model
TRACKING_URI = config.TRACKING_URI
EXPERIMENT_NAME = config.EXPERIMENT_NAME
configuration = AutoConfig.from_pretrained(MODEL_NAME)

logging.basicConfig(format="%(asctime)s: %(message)s") # Configure logging format to show timestamp before every message

logger = logging.getLogger()
logger.setLevel(logging.INFO) # Only show logs that are INFO or more important (e.g., WARNING, ERROR) — but ignore DEBUG.

In [4]:
# Load model config to inspect label mappings

print("Model Label ID to Label Mapping:")
print(configuration.id2label)

print("\nReverse Mapping (Label to ID):")
print(configuration.label2id)

Model Label ID to Label Mapping:
{0: 'ad hominem', 1: 'ad populum', 2: 'appeal to emotion', 3: 'circular reasoning', 4: 'equivocation', 5: 'fallacy of credibility', 6: 'fallacy of extension', 7: 'fallacy of logic', 8: 'fallacy of relevance', 9: 'false causality', 10: 'false dilemma', 11: 'faulty generalization', 12: 'intentional', 13: 'miscellaneous'}

Reverse Mapping (Label to ID):
{'ad hominem': 0, 'ad populum': 1, 'appeal to emotion': 2, 'circular reasoning': 3, 'equivocation': 4, 'fallacy of credibility': 5, 'fallacy of extension': 6, 'fallacy of logic': 7, 'fallacy of relevance': 8, 'false causality': 9, 'false dilemma': 10, 'faulty generalization': 11, 'intentional': 12, 'miscellaneous': 13}


In [5]:
DATA_PATH = "../data/data_dropped_duplicates_small.csv"
MODEL_PATH = "q3fer/distilbert-base-fallacy-classification"
MODEL_TRAINING_PATH ="q3fer/distilbert-base-fallacy-classification"
OUTPUT_DIR = "../models/q3fer/distilbert-base-fallacy-classification/trainer_output"
SAVE_PATH = "../models/q3fer/distilbert-base-fallacy-classification/pytorch_model"

### Get data

In [6]:
train_dataset, test_dataset, y_train, le = get_encode_tokenize_data(DATA_PATH, MODEL_PATH)

INFO:basic_functions:Loading data...
INFO:basic_functions:Train test split, test-size 0.3
INFO:root:encode the label column
INFO:root:tokenize
INFO:basic_functions:create tokenizer & load model
INFO:basic_functions:create tokenizer & load model
INFO:root:create TextDatasets (train & test)


In [7]:
# Define new labels
id2label = {
    0: "ad_hominem",
    1: "appeal_to_authority",
    2: "appeal_to_emotion",
    3: "false_dilemma",
    4: "faulty_generalization",
    5: "none"
}

label2id = {v: k for k, v in id2label.items()}

### Model Initialization

In [8]:
num_classes = len(np.unique(y_train))
num_classes

6

In [9]:
# Initialize model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, 
    num_labels=num_classes,
    ignore_mismatched_sizes= True
    )
model.config.id2label = id2label
model.config.label2id = label2id

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at q3fer/distilbert-base-fallacy-classification and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([14, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([14]) in the checkpoint and torch.Size([6]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
final_layer = model.classifier
# Check the size of the weights
print(final_layer.weight.shape)  # Will be [num_labels, hidden_size] (num_labels x hidden_size)

torch.Size([6, 768])


In [11]:
# To see a summary of the model
print(model.config)

DistilBertConfig {
  "_attn_implementation_autoset": true,
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "ad_hominem",
    "1": "appeal_to_authority",
    "2": "appeal_to_emotion",
    "3": "false_dilemma",
    "4": "faulty_generalization",
    "5": "none"
  },
  "initializer_range": 0.02,
  "label2id": {
    "ad_hominem": 0,
    "appeal_to_authority": 1,
    "appeal_to_emotion": 2,
    "false_dilemma": 3,
    "faulty_generalization": 4,
    "none": 5
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.50.3",
  "vocab_size": 30522
}



In [12]:
params = {
      "learning_rate": 5e-5,
      "weight_decay": 0.01,
      "num_train_epochs": 3,
      "evaluation_strategy": "epoch",
      "class_weight":True,
  }


# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)


mlflow.start_run()
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

mlflow.set_tag("model_name", MODEL_NAME)
mlflow.log_params(params)

Active run_id: 6ee8680208c14afd806411fff81af478


In [13]:
trainer = createTrainer(
    model= model, 
    train_dataset = train_dataset,
    test_dataset = test_dataset,
    output_dir= OUTPUT_DIR, 
    y_train=y_train, 
    class_weight=True, 
    epochs=3, 
    learning_rate=5e-5, 
    weight_decay = 0.01, 
    train_batch_size=4, 
    eval_batch_size=8 )

INFO:root:defining training arguments
INFO:root:get weighted loss trainer


### Execute Training

In [14]:
torch.mps.empty_cache()  # Clears unused GPU memory

In [15]:
# disable upper limit for memory
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

# Allows up to 100% of available memory
torch.mps.set_per_process_memory_fraction(1.0)  

In [16]:
logger.info('training is running')
trainer.train()

INFO:root:training is running
2025/04/07 13:45:54 ERROR mlflow.utils.async_logging.async_logging_queue: Run Id 6ee8680208c14afd806411fff81af478: Failed to log run data: Exception: Changing param values is not allowed. Param with key='evaluation_strategy' was already logged with value='epoch' for run ID='6ee8680208c14afd806411fff81af478'. Attempted logging new value 'None'.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2198,1.021656,0.657333
2,0.7696,1.24868,0.719333
3,0.2609,1.553619,0.732667


TrainOutput(global_step=2625, training_loss=0.7991413915724982, metrics={'train_runtime': 3425.5757, 'train_samples_per_second': 3.065, 'train_steps_per_second': 0.766, 'total_flos': 1391006905344000.0, 'train_loss': 0.7991413915724982, 'epoch': 3.0})

### Evaluation

In [17]:
def log_metrics(cr, brier, split):
    mlflow.log_metric(f"{split}_brier", brier)

    for key, value in cr.items():
        if (key == "accuracy"):
                # print(f"{split}_{key}", round(value,2))
                mlflow.log_metric(f"{split}_{key}", value)
        else:
            for metric in value:
                mlflow.log_metric(f"{split}_{key}_{metric}", value.get(metric))
                # print(f"{split}_{key}_{metric}", round(value.get(metric),2))

    

In [18]:
logger.info('predict on train_dataset')
train_output = trainer.predict(train_dataset)

classification_report, brier= get_eval_metrics(train_output, le)
log_metrics(classification_report, brier, "train")


INFO:root:predict on train_dataset


INFO:basic_functions:get evaluation metrics
INFO:basic_functions:classification_report
INFO:basic_functions:confusion_matrix
INFO:basic_functions:brier score


                       precision    recall  f1-score   support

           ad_hominem       0.94      0.70      0.80       325
  appeal_to_authority       0.57      0.86      0.69       229
    appeal_to_emotion       0.72      0.82      0.77       487
        false_dilemma       0.73      0.68      0.70       299
faulty_generalization       0.67      0.78      0.72       445
                 none       0.86      0.78      0.82      1715

             accuracy                           0.78      3500
            macro avg       0.75      0.77      0.75      3500
         weighted avg       0.79      0.78      0.78      3500

[[ 227   17   33    4    5   39]
 [   0  196   10    3    5   15]
 [   0   14  401    8   15   49]
 [   2    3   13  202   11   68]
 [   3   12   23    7  348   52]
 [  10  101   74   51  136 1343]]
Multiclass Brier score: 0.3871642602247193


In [19]:
logger.info('predict on test_dataset')
test_output = trainer.predict(test_dataset)

classification_report, brier = get_eval_metrics(test_output, le)
log_metrics(classification_report, brier, "test")


INFO:root:predict on test_dataset


INFO:basic_functions:get evaluation metrics
INFO:basic_functions:classification_report
INFO:basic_functions:confusion_matrix
INFO:basic_functions:brier score


                       precision    recall  f1-score   support

           ad_hominem       0.76      0.52      0.62       140
  appeal_to_authority       0.37      0.67      0.48        98
    appeal_to_emotion       0.63      0.73      0.67       208
        false_dilemma       0.69      0.66      0.67       128
faulty_generalization       0.48      0.50      0.49       191
                 none       0.78      0.70      0.74       735

             accuracy                           0.66      1500
            macro avg       0.62      0.63      0.61      1500
         weighted avg       0.68      0.66      0.66      1500

[[ 73  18  23   4   4  18]
 [  0  66  12   0   2  18]
 [  3  12 151   3  13  26]
 [  0   3   8  84   2  31]
 [  5  12  18   5  96  55]
 [ 15  66  29  25  84 516]]
Multiclass Brier score: 0.5168321446831328


In [20]:
mlflow.end_run()

In [None]:
# # Add prediction and reporting here
# output = trainer.predict(dataset["test"])
# predictions = np.argmax(output.predictions, axis=1)
# y_true = output.label_ids

# # Classification report
# label_names = le.classes_
# print(classification_report(y_true, predictions, target_names=label_names))

# # Confusion matrix
# cm = confusion_matrix(y_true, predictions)
# print("Confusion Matrix:")
# print(cm)

# # plotting
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_names, yticklabels=label_names)
# plt.xlabel("Predicted")
# plt.ylabel("True")
# plt.title("Confusion Matrix")
# plt.show()

### Save model

In [None]:
#save with pytorch
mlflow.pytorch.save_model(model, path=SAVE_PATH)

### Checking the q3fer

In [None]:
# # Run inference with q3fer model on sample texts
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)

# pipeline = TextClassificationPipeline(model=model, tokenizer=tokenizer)

# examples = [
#     "You have no idea what you're talking about; you've only lived here for six months.", # ad_hominem
#     "I read a book by a nutritionist who says all carbs are bad.", # appeal_to_authority
#     "Can I have the last piece of cake? You know how much I love it, and it's been a tough day for me.", # appeal_to_emotion
#     "If we don't order pizza for dinner, we'll have to eat the week-old spaghetti in the fridge.", # false_dilemma
#     "I was in Greece for two week. Greeks are amazing people!", # faulty_generalization
#     "We should look into the science that supports this idea." # none
# ]

# predictions = pipeline(examples)

# for i, pred in enumerate(predictions):
#     print(f"Text {i+1}: {examples[i]}")
#     print(f"Prediction: {pred['label']} (Score: {pred['score']:.2f})")
#     print("-" * 50)

In [None]:
# label_mapping = {
#     'ad hominem': 'ad_hominem',
#     'appeal to emotion': 'appeal_to_emotion',
#     'false dilemma': 'false_dilemma',
#     'faulty generalization': 'faulty_generalization',
#     'circular reasoning': 'other',
#     'appeal to authority': 'appeal_to_authority',  
#     'miscellaneous': 'other',  
#     'fallacy of logic': 'other',
#     'intentional': 'other',
#     'ad populum': 'other',
#     'equivocation': 'other',
#     'false causality': 'other',
#     'fallacy of relevance': 'other',
#     'fallacy of credibility': 'other',
#     'fallacy of extension': 'other'
# }

# #  the model's label doesn't include 'none'.Maybe we should/could test it out on our dataset with only the fallacies?

In [None]:
# # testing with the examples
# mapped_predictions = []

# for i, pred in enumerate(predictions):
#     original_label = pred['label']
#     mapped_label = label_mapping.get(original_label, 'unmapped')

#     print(f"Text {i+1}: {examples[i]}")
#     print(f"Original Prediction: {original_label} (Score: {pred['score']:.2f})")
#     print(f"Mapped to: {mapped_label}")
#     print("-" * 50)

#     mapped_predictions.append(mapped_label)
