# 02 - Fact Checking

This notebook is responsible for performing the fact-checking task on the claims that were extracted and normalized in the previous notebook. It loads the datasets generated previously, creates batches of jobs for fact-checking, and processes these jobs so that the LLM can classify the claims as true or false.

### Imports

In [None]:
# Native
import logging

# Third-party
import torch
import sklearn
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification

### Setup

In [None]:
# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

True

### Constants

In [None]:
# Dataset Constants
DATASET_NAME = "faketweetbr"
DATASET_TASK = "original"
DATASET_PROCESS_ID = ""

# Paths Constants
DATA_PATH = f"../data/{DATASET_NAME}/{DATASET_TASK}/{DATASET_PROCESS_ID + "/" if DATASET_PROCESS_ID else ""}" # Last path corresponds to the task that original data (i.e., original, claim_extraction, claim_normalization).
OUTPUT_PATH = f"../data/{DATASET_NAME}/results/"

# Model Constants
MODEL_NAME = "neuralmind/bert-large-portuguese-cased"

### Verify GPU Availability

In [None]:
logging.info(f"Torch CUDA available: {torch.cuda.is_available()}")
logging.info(f"Torch CUDA version: {torch.version.cuda}")
logging.info(f"GPU: {torch.cuda.get_device_name(0)}")

Torch CUDA available: True
Torch CUDA version: 12.1
GPU: NVIDIA GeForce RTX 4060 Ti


### Load Dataset

### Load Tokenizer and Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-large-portuguese-cased")
model = AutoModelForSequenceClassification.from_pretrained("neuralmind/bert-large-portuguese-cased")