In [1]:
!pip uninstall -y datasets huggingface_hub
!pip install datasets huggingface_hub

[0mCollecting datasets
  Using cached datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub
  Using cached huggingface_hub-0.33.4-py3-none-any.whl.metadata (14 kB)
Using cached datasets-4.0.0-py3-none-any.whl (494 kB)
Using cached huggingface_hub-0.33.4-py3-none-any.whl (515 kB)
Installing collected packages: huggingface_hub, datasets
Successfully installed datasets-4.0.0 huggingface_hub-0.33.4


In [2]:
!pip install -q transformers datasets scikit-learn


In [3]:
from datasets import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
import torch
import os

In [4]:
# Load the dataset
dataset = load_dataset("fever")
data = dataset["validation"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

fever.py: 0.00B [00:00, ?B/s]

Using the latest cached version of the dataset since fever couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'v2.0' at /root/.cache/huggingface/datasets/fever/v2.0/2.0.0/7f8936e0558704771b08c7ce9cc202071b29a0050603374507ba61d23c00a58e (last modified on Mon Jul 21 23:30:59 2025).


In [5]:
df = pd.DataFrame(data)
print(df.head())
print(df["label"].value_counts())


       id            label                                              claim  \
0  500000  NOT ENOUGH INFO  There is a convicted statutory rapist called C...   
1  500001         SUPPORTS  There exists a producer and an actor called Si...   
2  500002          REFUTES  Exotic Birds rejected to be an opening band fo...   
3  500002          REFUTES  Exotic Birds rejected to be an opening band fo...   
4  500002          REFUTES  Exotic Birds rejected to be an opening band fo...   

   evidence_annotation_id  evidence_id                   evidence_wiki_url  \
0                  269158           -1                                       
1                  141141       156349                          Simon_Pegg   
2                   25977        31918                        Exotic_Birds   
3                   25977        31918  Information_Society_-LRB-band-RRB-   
4                  300603       291751                        Exotic_Birds   

   evidence_sentence_id  
0                 

In [9]:
# Converting Dataset to DataFrame
data = data.to_pandas()

# label normalization
data['label'] = data['label'].apply(lambda x: 'NOT ENOUGH INFO' if str(x).strip().lower() == 'not enough info' else x)

# Viewing label counts
print(data['label'].value_counts())

label
SUPPORTS           970
REFUTES            963
NOT ENOUGH INFO    451
Name: count, dtype: int64


In [10]:
# Mapping labels to integers
label2id = {"SUPPORTS": 0, "REFUTES": 1, "NOT ENOUGH INFO": 2}
id2label = {v: k for k, v in label2id.items()}
df = df[df["label"].isin(label2id)]  # Remove any unexpected labels

df["label_id"] = df["label"].map(label2id)

# Splitting into train and test (80-20 split)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label_id"], random_state=42)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label_id"] = df["label"].map(label2id)


In [11]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

train_encodings = tokenizer(train_df["claim"].tolist(), truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_df["claim"].tolist(), truncation=True, padding=True, max_length=128)

train_labels = train_df["label_id"].tolist()
val_labels = val_df["label_id"].tolist()


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [12]:
class FeverDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = FeverDataset(train_encodings, train_labels)
val_dataset = FeverDataset(val_encodings, val_labels)


In [13]:
os.environ["WANDB_DISABLED"] = "true"


In [14]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)
trainer.train()

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss
1,No log,0.691117
2,No log,0.53121
3,No log,0.503569


TrainOutput(global_step=354, training_loss=0.6074500218623102, metrics={'train_runtime': 3196.3415, 'train_samples_per_second': 1.765, 'train_steps_per_second': 0.111, 'total_flos': 150714654592320.0, 'train_loss': 0.6074500218623102, 'epoch': 3.0})

In [15]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='macro')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


  trainer = Trainer(


In [17]:
trainer.evaluate()


{'eval_loss': 0.5035689473152161,
 'eval_model_preparation_time': 0.0039,
 'eval_accuracy': 0.8131634819532909,
 'eval_f1': 0.7842815043171161,
 'eval_precision': 0.78875762735916,
 'eval_recall': 0.783403621940559,
 'eval_runtime': 66.5177,
 'eval_samples_per_second': 7.081,
 'eval_steps_per_second': 0.12}

In [18]:
text = "The capital of France is Paris."
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
predicted = torch.argmax(outputs.logits, dim=1).item()
print(f"Prediction: {id2label[predicted]}")

Prediction: REFUTES
