In [2]:
# Reading or making path/Dir available and accessible for conda to read
# Import functions/modules from other python Scripts created
import sys
from pathlib import Path

PROJECT_ROOT = Path("..").resolve()
sys.path.append(str(PROJECT_ROOT))

from src.data.clean_text import basic_clean
from src.models.baseline import BaselineConfig, run_baseline_multiclass
from src.models.slm_finetune import SLMConfigMC, train_slm_multiclass
from src.explainer_Qwen import QwenExplainer
from src.labels_climate import ID2LABEL, LABEL2ID

print("Modeules and Scripts Imported Successfully")
print("Imports OK, labels:", LABEL2ID)

Modeules and Scripts Imported Successfully
Imports OK, labels: {'CHERRY_PICKING': 0, 'EVADING_THE_BURDEN_OF_PROOF': 1, 'FALSE_ANALOGY': 2, 'FALSE_AUTHORITY': 3, 'FALSE_CAUSE': 4, 'HASTY_GENERALISATION': 5, 'NO_FALLACY': 6, 'POST_HOC': 7, 'RED_HERRINGS': 8, 'STRAWMAN': 9, 'VAGUENESS': 10}


In [4]:
# Importing and calling all required modules
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import classification_report

print("Modules imported successfully")

Modules imported successfully


In [6]:
 # Raw CLIMATE data => just as in Tariq's work.
RAW_CLIMATE_DIR = PROJECT_ROOT / "data" / "raw" / "climate"
TRAIN_DIR = RAW_CLIMATE_DIR / "train"
DEV_DIR   = RAW_CLIMATE_DIR / "dev"
TEST_DIR  = RAW_CLIMATE_DIR / "test"

# This is where we save cleaned CSVs
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

# Column name in the .tsv that holds the actual text
TEXT_COL = "fact_checked_segment"

print("Train dir:", TRAIN_DIR)
print("Dev dir:", DEV_DIR)
print("Test dir:", TEST_DIR)


Train dir: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\raw\climate\train
Dev dir: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\raw\climate\dev
Test dir: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\raw\climate\test


In [7]:
def load_split(split_dir: Path) -> pd.DataFrame:
    """
    Here we Read all .tsv files in a split directory, adds label from filename,
    and concatenates into one DataFrame -> thus 1csv file.
    """
    rows = []
    for path in split_dir.glob("*.tsv"):
        label = path.stem  # filename without extension, e.g. "Cherry Picking"
        df = pd.read_csv(path, sep="\t")
        df["label_str_raw"] = label
        rows.append(df)
    return pd.concat(rows, ignore_index=True)


train_df_raw = load_split(TRAIN_DIR)
dev_df_raw   = load_split(DEV_DIR)
test_df_raw  = load_split(TEST_DIR)

train_df_raw.head()


Unnamed: 0,fact_checked_segment,comment_by_fact-checker,article,label_str_raw
0,“climate economists see a positive externality...,This is cherry-picking at its worst. You can a...,article36.txt,Cherry Picking
1,The latest U.N. science compendium asserts tha...,The recent US National Climate Assessment1 fin...,article130.txt,Cherry Picking
2,"“‘If we are right, our study challenges decade...",It only potentially challenges ONE method used...,,Cherry Picking
3,“a killer analysis conducted by Craig Idso of ...,"This publication is not peer-reviewed, cherry-...",article45.txt,Cherry Picking
4,"“Next year or the year after that, I think it ...",I would also add that predictions of an ice-fr...,article33.txt,Cherry Picking


In [8]:
# Putting all csv's  into a dir in data called combined_csv for reference
# Just for backup purposes 
# creating the combined_csv folder
COMBINED_DIR = PROJECT_ROOT / "data" / "combined_csv"
COMBINED_DIR.mkdir(parents=True, exist_ok=True)

print("Saving combined CSVs to:", COMBINED_DIR)

# Here we go ahead and create the various csv files and save them to combined_csv
train_comb_path = COMBINED_DIR / "climate_train_combined.csv"
dev_comb_path   = COMBINED_DIR / "climate_dev_combined.csv"
test_comb_path  = COMBINED_DIR / "climate_test_combined.csv"

train_df_raw.to_csv(train_comb_path, index=False)
dev_df_raw.to_csv(dev_comb_path, index=False)
test_df_raw.to_csv(test_comb_path, index=False)

print("Saved:")
print(" -", train_comb_path)
print(" -", dev_comb_path)
print(" -", test_comb_path)




Saving combined CSVs to: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv
Saved:
 - C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv\climate_train_combined.csv
 - C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv\climate_dev_combined.csv
 - C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv\climate_test_combined.csv


In [9]:
# Normalising Label Names
# we create a function which makes changes to the label names in our file(csv) from lower case to UPPERCASE
def normalize_label(label: str) -> str:
    """
    e.g. "Cherry Picking" -> "CHERRY_PICKING"
         "No fallacy"    -> "NO_FALLACY"
    """
    return (
        str(label)
        .strip()
        .replace(" ", "_")
        .replace("-", "_")
        .upper()
    )


train_df_raw["label_str"] = train_df_raw["label_str_raw"].apply(normalize_label)
dev_df_raw["label_str"]   = dev_df_raw["label_str_raw"].apply(normalize_label)
test_df_raw["label_str"]  = test_df_raw["label_str_raw"].apply(normalize_label)

sorted(train_df_raw["label_str"].unique())

['CHERRY_PICKING',
 'EVADING_THE_BURDEN_OF_PROOF',
 'FALSE_ANALOGY',
 'FALSE_AUTHORITY',
 'FALSE_CAUSE',
 'HASTY_GENERALIZATION',
 'NO_FALLACY',
 'POST_HOC',
 'RED_HERRING',
 'STRAWMAN',
 'VAGUENESS']

In [10]:
# Fix US/UK + singular/plural mismatches
# Fix label variants to match labels_climate.py since some missing labels exists in data.
label_corrections = {
    "HASTY_GENERALIZATION": "HASTY_GENERALISATION",
    "RED_HERRING": "RED_HERRINGS",
}

for df in [train_df_raw, dev_df_raw, test_df_raw]:
    df["label_str"] = df["label_str"].replace(label_corrections)

sorted(train_df_raw["label_str"].unique())

# Then we Map all labels to numeric IDs using LABEL2ID after adjusting label variants to match labels_climate.py
train_df_raw["label_id"] = train_df_raw["label_str"].map(LABEL2ID)
dev_df_raw["label_id"]   = dev_df_raw["label_str"].map(LABEL2ID)
test_df_raw["label_id"]  = test_df_raw["label_str"].map(LABEL2ID)

print("Missing label_id in train:", train_df_raw["label_id"].isna().sum())
print("Missing label_id in dev:  ", dev_df_raw["label_id"].isna().sum())
print("Missing label_id in test: ", test_df_raw["label_id"].isna().sum())



Missing label_id in train: 0
Missing label_id in dev:   0
Missing label_id in test:  0


In [11]:
# Here we clean our text by calling or importing our function "basic_clean" from clean_text.py

train_df_raw["text_clean"] = train_df_raw[TEXT_COL].astype(str).apply(basic_clean)
dev_df_raw["text_clean"]   = dev_df_raw[TEXT_COL].astype(str).apply(basic_clean)
test_df_raw["text_clean"]  = test_df_raw[TEXT_COL].astype(str).apply(basic_clean)

train_df_raw[["fact_checked_segment", "text_clean", "label_str"]].head(2)

Unnamed: 0,fact_checked_segment,text_clean,label_str
0,“climate economists see a positive externality...,"climate economists see a positive externality,...",CHERRY_PICKING
1,The latest U.N. science compendium asserts tha...,The latest U.N. science compendium asserts tha...,CHERRY_PICKING


In [12]:
#  Drop rows with missing text/labels, save processed CSVs ===

for name, df in [("train", train_df_raw), ("dev", dev_df_raw), ("test", test_df_raw)]:
    before = df.shape[0]
    df.dropna(subset=["text_clean", "label_id"], inplace=True)
    df["text_clean"] = df["text_clean"].fillna("").astype(str)
    after = df.shape[0]
    print(f"{name}: dropped {before - after} rows, kept {after}")

train: dropped 0 rows, kept 437
dev: dropped 0 rows, kept 115
test: dropped 0 rows, kept 133


In [13]:
# Removing and dropping rows with missing text or labels => NaN

train_df_raw["text_clean"].isna().sum(), train_df_raw["label_id"].isna().sum()

# Drop rows where text_clean or label_id is missing
for name, df in [("train", train_df_raw), ("dev", dev_df_raw), ("test", test_df_raw)]:
    before = df.shape[0]
    df.dropna(subset=["text_clean", "label_id"], inplace=True)
    df["text_clean"] = df["text_clean"].astype(str)
    after = df.shape[0]
    print(f"{name}: dropped {before - after} rows with NaN text/label, kept {after}")

# confirmation that there are no Nan in processed data 
print("NaNs in train:", train_df_raw["text_clean"].isna().sum(), train_df_raw["label_id"].isna().sum())
print("NaNs in dev:", dev_df_raw["text_clean"].isna().sum(), dev_df_raw["label_id"].isna().sum())
print("NaNs in test:", test_df_raw["text_clean"].isna().sum(), test_df_raw["label_id"].isna().sum())


train: dropped 0 rows with NaN text/label, kept 437
dev: dropped 0 rows with NaN text/label, kept 115
test: dropped 0 rows with NaN text/label, kept 133
NaNs in train: 0 0
NaNs in dev: 0 0
NaNs in test: 0 0


In [14]:
# Now lets save all CSV since we have successfully completed Processing data
train_path = PROCESSED_DIR / "climate_train.csv"
dev_path   = PROCESSED_DIR / "climate_dev.csv"
test_path  = PROCESSED_DIR / "climate_test.csv"

train_df_raw.to_csv(train_path, index=False)
dev_df_raw.to_csv(dev_path, index=False)
test_df_raw.to_csv(test_path, index=False)

print("Saved processed datasets to:")
print("  ", train_path)
print("  ", dev_path)
print("  ", test_path)


Saved processed datasets to:
   C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\processed\climate_train.csv
   C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\processed\climate_dev.csv
   C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\processed\climate_test.csv


In [15]:
# === Load Preproceesed splits (for safety /re-runs) and build combined csvs ===
# Assigning train, test or dev to saved csv's
train_df = pd.read_csv(train_path)
dev_df   = pd.read_csv(dev_path)
test_df  = pd.read_csv(test_path)

# Safety: enforce types
for name, df in [("train", train_df), ("dev", dev_df), ("test", test_df)]:
    df["text_clean"] = df["text_clean"].fillna("").astype(str)
    df["label_id"]   = df["label_id"].astype(int)
    print(f"{name} shape:", df.shape)

# Save individual combined CSVs
train_df.to_csv(COMBINED_DIR / "climate_train_combined.csv", index=False)
dev_df.to_csv(COMBINED_DIR / "climate_dev_combined.csv", index=False)
test_df.to_csv(COMBINED_DIR / "climate_test_combined.csv", index=False)

# Save full combined dataset (train+dev+test)
full_df = pd.concat([train_df, dev_df, test_df], ignore_index=True)
full_df.to_csv(COMBINED_DIR / "climate_full_combined.csv", index=False)

print("Saved combined CSVs to:", COMBINED_DIR)

train_df.head(3)


train shape: (437, 7)
dev shape: (115, 7)
test shape: (133, 7)
Saved combined CSVs to: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv


Unnamed: 0,fact_checked_segment,comment_by_fact-checker,article,label_str_raw,label_str,label_id,text_clean
0,“climate economists see a positive externality...,This is cherry-picking at its worst. You can a...,article36.txt,Cherry Picking,CHERRY_PICKING,0,"climate economists see a positive externality,..."
1,The latest U.N. science compendium asserts tha...,The recent US National Climate Assessment1 fin...,article130.txt,Cherry Picking,CHERRY_PICKING,0,The latest U.N. science compendium asserts tha...
2,"“‘If we are right, our study challenges decade...",It only potentially challenges ONE method used...,,Cherry Picking,CHERRY_PICKING,0,"If we are right, our study challenges decades ..."


In [16]:
# ===  Build full_train (train + dev), then UPSAMPLE ===

full_train = pd.concat([train_df, dev_df], ignore_index=True)

print("Original label distribution (full_train):")
print(full_train["label_id"].value_counts().sort_index())
print({i: ID2LABEL[i] for i in sorted(ID2LABEL)})


def balance_dataset(df: pd.DataFrame, label_col: str = "label_id") -> pd.DataFrame:
    counts = df[label_col].value_counts()
    max_count = counts.max()
    print("Original counts:\n", counts.sort_index())
    print("Max count:", max_count)

    balanced_parts = []
    for label, count in counts.items():
        subset = df[df[label_col] == label]
        upsampled = subset.sample(
            n=max_count,
            replace=True,
            random_state=42,
        )
        balanced_parts.append(upsampled)

    balanced_df = (
        pd.concat(balanced_parts)
        .sample(frac=1, random_state=42)
        .reset_index(drop=True)
    )
    print("Balanced counts:\n", balanced_df[label_col].value_counts().sort_index())
    return balanced_df


full_train_balanced = balance_dataset(full_train, label_col="label_id")

# Save balanced training CSV
balanced_path = COMBINED_DIR / "climate_train_balanced.csv"
full_train_balanced.to_csv(balanced_path, index=False)
print("Saved balanced training data to:", balanced_path)


Original label distribution (full_train):
label_id
0      84
1      39
2      22
3      40
4      37
5       6
6     167
7      12
8      56
9      29
10     60
Name: count, dtype: int64
{0: 'CHERRY_PICKING', 1: 'EVADING_THE_BURDEN_OF_PROOF', 2: 'FALSE_ANALOGY', 3: 'FALSE_AUTHORITY', 4: 'FALSE_CAUSE', 5: 'HASTY_GENERALISATION', 6: 'NO_FALLACY', 7: 'POST_HOC', 8: 'RED_HERRINGS', 9: 'STRAWMAN', 10: 'VAGUENESS'}
Original counts:
 label_id
0      84
1      39
2      22
3      40
4      37
5       6
6     167
7      12
8      56
9      29
10     60
Name: count, dtype: int64
Max count: 167
Balanced counts:
 label_id
0     167
1     167
2     167
3     167
4     167
5     167
6     167
7     167
8     167
9     167
10    167
Name: count, dtype: int64
Saved balanced training data to: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\combined_csv\climate_train_balanced.csv


In [17]:
# Baseline Model (TF-IDF + Linear SVM)

cfg = BaselineConfig(
    test_size=0.2,
    random_state=42,
    max_features=20000,
    ngram_range=(1, 2)
)

print("\n=== Baseline on UNBALANCED full_train ===")
baseline_model_unbal, baseline_vec_unbal = run_baseline_multiclass(
    df=full_train,            # unbalanced
    text_col="text_clean",
    label_col="label_id",
    cfg=cfg,
)

print("\n=== Baseline on BALANCED full_train_balanced ===")
baseline_model_bal, baseline_vec_bal = run_baseline_multiclass(
    df=full_train_balanced,   # balanced
    text_col="text_clean",
    label_col="label_id",
    cfg=cfg,
)


=== Baseline on UNBALANCED full_train ===
=== Baseline (TF-IDF + Linear SVM) ===
              precision    recall  f1-score   support

           0       0.24      0.24      0.24        17
           1       0.33      0.12      0.18         8
           2       0.00      0.00      0.00         4
           3       0.50      0.50      0.50         8
           4       1.00      0.12      0.22         8
           5       0.00      0.00      0.00         1
           6       0.35      0.68      0.46        34
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00        11
           9       0.00      0.00      0.00         6
          10       0.12      0.08      0.10        12

    accuracy                           0.31       111
   macro avg       0.23      0.16      0.15       111
weighted avg       0.29      0.31      0.25       111


=== Baseline on BALANCED full_train_balanced ===


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


=== Baseline (TF-IDF + Linear SVM) ===
              precision    recall  f1-score   support

           0       0.79      0.70      0.74        33
           1       0.91      0.97      0.94        33
           2       0.87      1.00      0.93        33
           3       0.89      0.91      0.90        34
           4       0.97      0.94      0.96        34
           5       0.94      1.00      0.97        33
           6       0.81      0.62      0.70        34
           7       0.94      1.00      0.97        34
           8       0.86      0.88      0.87        34
           9       0.97      0.94      0.95        33
          10       0.88      0.91      0.90        33

    accuracy                           0.90       368
   macro avg       0.89      0.90      0.89       368
weighted avg       0.89      0.90      0.89       368



In [18]:
slm_cfg = SLMConfigMC(
    model_name="distilroberta-base",
    max_length=256,
    learning_rate=2e-5,
    batch_size=16,
    num_epochs=10,
    weight_decay=0.01,
    output_dir=str(PROJECT_ROOT / "outputs" / "slm_climate_multiclass")
)

slm_trainer = train_slm_multiclass(
    df=full_train_balanced,
    text_col="text_clean",
    label_col="label_id",
    cfg=slm_cfg
)


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Error while downloading from https://huggingface.co/distilroberta-base/resolve/main/vocab.json: HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out.
Trying to resume download...


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/1469 [00:00<?, ? examples/s]

Map:   0%|          | 0/368 [00:00<?, ? examples/s]

Class weights (by label_id): {np.int64(0): np.float64(0.9966078697421981), np.int64(1): np.float64(0.9966078697421981), np.int64(2): np.float64(0.9966078697421981), np.int64(3): np.float64(1.0041011619958988), np.int64(4): np.float64(1.0041011619958988), np.int64(5): np.float64(0.9966078697421981), np.int64(6): np.float64(1.0041011619958988), np.int64(7): np.float64(1.0041011619958988), np.int64(8): np.float64(1.0041011619958988), np.int64(9): np.float64(0.9966078697421981), np.int64(10): np.float64(0.9966078697421981)}


model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  super().__init__(*args, **kwargs)


Step,Training Loss
50,2.4002
100,2.0912
150,1.5353
200,1.1791
250,0.8803
300,0.7017
350,0.5227
400,0.4941
450,0.4119
500,0.3512




In [19]:
# Saving model to "slm_climate_multiclass folder in output"
# creating its dir
model_dir = PROJECT_ROOT / "outputs" / "slm_climate_multiclass"
model_dir.mkdir(parents=True, exist_ok=True)

# Save model + tokenizer into  folder
slm_trainer.save_model(model_dir)
slm_trainer.tokenizer.save_pretrained(model_dir)

print("Saved model + tokenizer to:", model_dir)

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Saved model + tokenizer to: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\outputs\slm_climate_multiclass


In [20]:
# Loading  from  fine-tuned model directory
model_dir = PROJECT_ROOT / "outputs" / "slm_climate_multiclass"
print("Loading model from:", model_dir)

# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir)

# Tokenization function for evaluation
def tokenize_fn_eval(batch):
    return tokenizer(
        batch["text_clean"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )

# Prepare test dataset for HF Trainer
test_ds = Dataset.from_pandas(
    test_df[["text_clean", "label_id"]].reset_index(drop=True)
)
test_ds = test_ds.map(tokenize_fn_eval, batched=True)
test_ds = test_ds.rename_column("label_id", "labels")

# Safely remove unused columns
cols_to_remove = [
    c for c in ["text_clean", "__index_level_0__"] if c in test_ds.column_names
]
test_ds = test_ds.remove_columns(cols_to_remove)

# Ensure PyTorch format
test_ds.set_format("torch")

# New Trainer instance for evaluation
trainer_eval = Trainer(model=model, tokenizer=tokenizer)

# Get predictions
predictions = trainer_eval.predict(test_ds)

y_true = predictions.label_ids
y_pred = np.argmax(predictions.predictions, axis=-1)

# Print classification report with fallacy labels
print(
    classification_report(
        y_true,
        y_pred,
        target_names=[ID2LABEL[i] for i in sorted(ID2LABEL)],
    )
)

Loading model from: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\outputs\slm_climate_multiclass


Map:   0%|          | 0/133 [00:00<?, ? examples/s]

  trainer_eval = Trainer(model=model, tokenizer=tokenizer)


                             precision    recall  f1-score   support

             CHERRY_PICKING       0.15      0.19      0.17        21
EVADING_THE_BURDEN_OF_PROOF       0.40      0.22      0.29         9
              FALSE_ANALOGY       0.38      0.60      0.46         5
            FALSE_AUTHORITY       0.50      0.30      0.38        10
                FALSE_CAUSE       0.25      0.22      0.24         9
       HASTY_GENERALISATION       0.00      0.00      0.00         2
                 NO_FALLACY       0.34      0.24      0.29        41
                   POST_HOC       0.00      0.00      0.00         2
               RED_HERRINGS       0.27      0.31      0.29        13
                   STRAWMAN       0.11      0.14      0.12         7
                  VAGUENESS       0.25      0.43      0.32        14

                   accuracy                           0.26       133
                  macro avg       0.24      0.24      0.23       133
               weighted avg     

In [23]:
sample_text = ( "In the sugarcane region of El Salvador, as much as one-fifth of the population has chronic kidney disease, including over a quarter of the men, the presumed result of dehydration from working the fields they were able to comfortably harvest as recently as two decades ago."
)

# Token Input
inputs = tokenizer(
    sample_text,
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=256,
)

# Forward Pass
outputs = model(**inputs)
logits = outputs.logits

# Predicted Class ID
pred_id = int(logits.argmax().item())
pred_label = ID2LABEL[pred_id]

# Softmax Probabilitied
probs = F.softmax(logits, dim=-1)
pred_confidence = float(probs[0][pred_id].item())

print("Sample text:", sample_text)
print("Predicted fallacy:", ID2LABEL[pred_id])
print(f"Prediction confidence: {pred_confidence:.4f}")


Sample text: In the sugarcane region of El Salvador, as much as one-fifth of the population has chronic kidney disease, including over a quarter of the men, the presumed result of dehydration from working the fields they were able to comfortably harvest as recently as two decades ago.
Predicted fallacy: CHERRY_PICKING
Prediction confidence: 0.8579


In [24]:
explainer = QwenExplainer()


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [25]:
# Using Qwen for explanations
# Mix of correct and incorrect predictions

# Indices of correctly and incorrectly classified examples

correct_idx = np.where(y_true == y_pred)[0][:3]
wrong_idx   = np.where(y_true != y_pred)[0][:3]

indices = list(correct_idx) + list(wrong_idx)

for idx in indices:
    row = test_df.iloc[idx]
    text = row["text_clean"]
    true_label_id = int(row["label_id"])
    pred_label_id = int(y_pred[idx])

    true_label_name = ID2LABEL[true_label_id]
    pred_label_name = ID2LABEL[pred_label_id]

    print("=" * 80)
    print(f"Example index: {idx}")
    print("- Text:")
    print(text)
    print(f"- True label:      {true_label_name}")
    print(f"- Predicted label: {pred_label_name}")

    # Qwen Explanation
    explanation = explainer.explain(text, pred_label_name)
    print("\nQwen explanation:")
    print(explanation)
    print()


Example index: 13
- Text:
The particular signature of warming in 2016 was also revealing in another way, Overpeck said, noting that the stratosphere saw record cold temperatures last year
- True label:      CHERRY_PICKING
- Predicted label: CHERRY_PICKING

Qwen explanation:
Cherry picking occurs when someone selectively chooses only certain data points or evidence to support their claim while ignoring contradictory information. In this case, the author seems to have cherry picked the temperature data from the stratosphere to support their claim about global warming without considering other factors such as oceanic temperatures or land surface temperatures.
This text could potentially be an example of cherry picking because it focuses on one specific piece of data (record cold temperatures in the stratosphere) to argue for global warming, but overlooks other important variables like oceanic temperatures and land surface temperatures which may provide a more comprehensive view of overall

In [26]:
records = []
num_samples = 20  # or len(test_df) if you’re brave lol!!!

for idx in range(num_samples):
    row = test_df.iloc[idx]
    text = row["text_clean"]
    true_label_id = int(row["label_id"])
    pred_label_id = int(y_pred[idx])

    true_label_name = ID2LABEL[true_label_id]
    pred_label_name = ID2LABEL[pred_label_id]

    explanation = explainer.explain(text, pred_label_name)

    records.append({
        "index": idx,
        "text": text,
        "true_label": true_label_name,
        "pred_label": pred_label_name,
        "qwen_explanation": explanation,
    })

exp_df = pd.DataFrame(records)
exp_path = PROJECT_ROOT / "data" / "Qwen_exp" / "climate_test_with_phi35_explanations.csv"
exp_df.to_csv(exp_path, index=False)
print("Saved explanations to:", exp_path)


Saved explanations to: C:\Users\HP\UG - DATA SCIENCE\NLP\Steadx01_NLP\data\Qwen_exp\climate_test_with_phi35_explanations.csv
