# 🇳🇬 RoBERTa Fine-Tuning for Nigerian Fake News Detection
This notebook fine-tunes the `roberta-base` model using a dataset of Nigerian real and fake news texts.


In [1]:
!pip install --upgrade transformers datasets scikit-learn --quiet

In [2]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import Dataset, DatasetDict

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("nigeria_fake_news_dataset.csv")

# 🔁 Convert 'REAL' and 'FAKE' to integers
df["label"] = df["label"].map({"REAL": 0, "FAKE": 1})
df["label"] = df["label"].astype(int)

df.head()


Unnamed: 0,title,text,label,id
0,Buhari cloned in the UK (Entry 1),A viral message claims President Buhari died a...,1,1
1,CBN launches digital currency eNaira (Entry 2),The Central Bank of Nigeria has announced the ...,0,2
2,5G towers cause COVID-19 in Lagos (Entry 3),Rumors spread on WhatsApp that new 5G towers i...,1,3
3,Lagos-Ibadan Expressway reconstruction 75% com...,"According to the Minister of Works, the Lagos-...",0,4
4,Coca-Cola Nigeria shutting down due to low sal...,A blog post claims Coca-Cola Nigeria will shut...,1,5


In [4]:
model_path = "roberta_fake_news_model"
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model = RobertaForSequenceClassification.from_pretrained(model_path)

In [5]:
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=1).item()
        return "Fake" if prediction == 1 else "Real"