## The project focuses on fake news detection using transformer architectures. It utilizes two pre-trained models, BERT and BART, and a custom-built transformer model trained from scratch. By analyzing both headlines and article text, the project aims to achieve high accuracy through careful data preprocessing, model training, and evaluation.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import nbformat

notebook_path = "/content/drive/MyDrive/Colab Notebooks/DLP/DLP_Project(21k3924,21k3834).ipynb"

with open(notebook_path, 'r', encoding='utf-8') as f:
    nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)

if 'widgets' in nb['metadata']:
    del nb['metadata']['widgets']

with open(notebook_path, 'w', encoding='utf-8') as f:
    nbformat.write(nb, f)

print("Metadata cleaned successfully!")

Metadata cleaned successfully!


### BERT (pretrained)

In [None]:
!pip install transformers datasets scikit-learn

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.w

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
real = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DLP/News_Data/True.csv')
fake = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DLP/News_Data/Fake.csv')

fake = fake[fake['text'].notna() & (fake['text'].str.strip() != '')]

real['label'] = 1
fake['label'] = 0

In [None]:
data = pd.concat([real, fake]).sample(frac=1).reset_index(drop=True)

data['content'] = data['title'] + ' ' + data['text']

data = data[['content', 'label']]

In [None]:
train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['content'].tolist(),
    data['label'].tolist(),
    test_size=0.2,
    random_state=42
)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=256)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=256)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = NewsDataset(train_encodings, train_labels)
val_dataset = NewsDataset(val_encodings, val_labels)

In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.to(device)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
optimizer = AdamW(model.parameters(), lr=5e-5)

In [None]:
num_epochs = 3
num_training_steps = num_epochs * len(train_dataset)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

In [None]:
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}")

Epoch 1 - Loss: 0.0205
Epoch 2 - Loss: 0.0312
Epoch 3 - Loss: 0.5665


In [None]:
model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(batch['labels'].cpu().numpy())

In [None]:
accuracy = accuracy_score(true_labels, predictions)
print(f"\nValidation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9626


In [None]:
print("\nClassification Report:")
print(classification_report(true_labels, predictions, target_names=["Fake", "Real"]))


Classification Report:
              precision    recall  f1-score   support

        Fake       0.93      1.00      0.96      4556
        Real       1.00      0.92      0.96      4298

    accuracy                           0.96      8854
   macro avg       0.97      0.96      0.96      8854
weighted avg       0.97      0.96      0.96      8854



### BART (pretrained)

In [None]:
!pip install transformers datasets scikit-learn

import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from torch.optim import AdamW
from transformers import BartTokenizer, BartForSequenceClassification, get_scheduler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

real = pd.read_csv('True.csv')
fake = pd.read_csv('Fake.csv')
fake = fake[fake['text'].notna() & (fake['text'].str.strip() != '')]

real['label'] = 1
fake['label'] = 0

data = pd.concat([real, fake]).sample(frac=1, random_state=42).reset_index(drop=True)
data['content'] = data['title'] + ' ' + data['text']
data = data[['content', 'label']]

train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['content'].tolist(),
    data['label'].tolist(),
    test_size=0.2,
    random_state=42
)

tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
train_encodings = tokenizer(train_texts, truncation=True, padding='max_length', max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding='max_length', max_length=128)

class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = NewsDataset(train_encodings, train_labels)
val_dataset = NewsDataset(val_encodings, val_labels)

model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=2)
model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
train_batch_size = 8
gradient_accumulation_steps = 1

train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=8, num_workers=4, pin_memory=True)

num_training_steps = num_epochs * len(train_loader) // gradient_accumulation_steps
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

scaler = torch.cuda.amp.GradScaler()

model.train()
for epoch in range(num_epochs):
    print(f"\n[INFO] Epoch {epoch + 1} started...")
    total_loss = 0
    for step, batch in enumerate(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}

        with torch.cuda.amp.autocast():
            outputs = model(**batch)
            loss = outputs.loss / gradient_accumulation_steps

        scaler.scale(loss).backward()

        if (step + 1) % gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            lr_scheduler.step()
            optimizer.zero_grad()

        total_loss += loss.item() * gradient_accumulation_steps

        if step % 10 == 0:
            print(f"[INFO] Step {step}/{len(train_loader)} - Loss: {loss.item() * gradient_accumulation_steps:.4f}")

    avg_loss = total_loss / len(train_loader)
    print(f"[INFO] Epoch {epoch + 1} completed - Average Loss: {avg_loss:.4f}")

model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.cuda.amp.autocast():
            outputs = model(**batch)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(batch['labels'].cpu().numpy())

accuracy = accuracy_score(true_labels, predictions)
print(f"\n[RESULT] Validation Accuracy: {accuracy:.4f}")
print("\n[RESULT] Classification Report:")
print(classification_report(true_labels, predictions, target_names=["Fake", "Real"]))


Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



[INFO] Epoch 1 started...


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


[INFO] Step 0/200 - Loss: 0.7239
[INFO] Step 10/200 - Loss: 0.2428
[INFO] Step 20/200 - Loss: 0.0029
[INFO] Step 30/200 - Loss: 0.9321
[INFO] Step 40/200 - Loss: 0.0378
[INFO] Step 50/200 - Loss: 0.0159
[INFO] Step 60/200 - Loss: 0.8124
[INFO] Step 70/200 - Loss: 0.0030
[INFO] Step 80/200 - Loss: 0.0027
[INFO] Step 90/200 - Loss: 0.0027
[INFO] Step 100/200 - Loss: 0.0014
[INFO] Step 110/200 - Loss: 0.0009
[INFO] Step 120/200 - Loss: 0.0007
[INFO] Step 130/200 - Loss: 0.0007
[INFO] Step 140/200 - Loss: 0.0006
[INFO] Step 150/200 - Loss: 0.0005
[INFO] Step 160/200 - Loss: 0.0004
[INFO] Step 170/200 - Loss: 0.0004
[INFO] Step 180/200 - Loss: 0.0004
[INFO] Step 190/200 - Loss: 0.0003
[INFO] Epoch 1 completed - Average Loss: 0.0521

[INFO] Epoch 2 started...
[INFO] Step 0/200 - Loss: 0.0003
[INFO] Step 10/200 - Loss: 0.0003
[INFO] Step 20/200 - Loss: 0.0003
[INFO] Step 30/200 - Loss: 0.0002
[INFO] Step 40/200 - Loss: 0.0002
[INFO] Step 50/200 - Loss: 0.0003
[INFO] Step 60/200 - Loss: 0.0041

  with torch.cuda.amp.autocast():



[RESULT] Validation Accuracy: 1.0000

[RESULT] Classification Report:
              precision    recall  f1-score   support

        Fake       1.00      1.00      1.00       203
        Real       1.00      1.00      1.00       197

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400



###Custom Transformer (trained)

In [None]:
!pip install torch scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
real = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DLP/News_Data/True.csv')
fake = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DLP/News_Data/Fake.csv')

real['label'] = 1
fake['label'] = 0

In [None]:
data = pd.concat([real, fake]).sample(frac=1).reset_index(drop=True)
data['content'] = data['title'] + ' ' + data['text']
data = data[['content', 'label']]

In [None]:
train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['content'].tolist(),
    data['label'].tolist(),
    test_size=0.2,
    random_state=42
)

In [None]:
from collections import Counter

def build_vocab(texts, max_vocab_size=30000):
    counter = Counter()
    for text in texts:
        tokens = text.lower().split()
        counter.update(tokens)
    most_common = counter.most_common(max_vocab_size - 2)
    vocab = {word: idx + 2 for idx, (word, _) in enumerate(most_common)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab

vocab = build_vocab(train_texts)

In [None]:
def encode_text(text, vocab, max_len=512):
    tokens = text.lower().split()
    token_ids = [vocab.get(token, vocab["<UNK>"]) for token in tokens]
    if len(token_ids) < max_len:
        token_ids += [vocab["<PAD>"]] * (max_len - len(token_ids))
    else:
        token_ids = token_ids[:max_len]
    return token_ids

train_inputs = [encode_text(text, vocab) for text in train_texts]
val_inputs = [encode_text(text, vocab) for text in val_texts]

In [None]:
class NewsDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        input_ids = torch.tensor(self.inputs[idx], dtype=torch.long)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return input_ids, label

In [None]:
train_dataset = NewsDataset(train_inputs, train_labels)
val_dataset = NewsDataset(val_inputs, val_labels)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [None]:
class SmallTransformer(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, num_heads=4, num_layers=2, num_classes=2, max_len=512):
        super(SmallTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.positional_encoding = nn.Parameter(torch.randn(1, max_len, embed_dim))
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=256)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        x = x.permute(1, 0, 2)
        x = self.transformer_encoder(x)
        x = x.mean(dim=0)
        out = self.fc(x)
        return out

model = SmallTransformer(vocab_size=len(vocab)).to(device)



In [None]:
optimizer = optim.Adam(model.parameters(), lr=2e-4)
criterion = nn.CrossEntropyLoss()

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for batch in loop:
        input_ids, labels = batch
        input_ids = input_ids.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1} Average Loss: {total_loss/len(train_loader):.4f}")

Epoch [1/5]: 100%|██████████| 1123/1123 [00:54<00:00, 20.42it/s, loss=0.000493]


Epoch 1 Average Loss: 0.0645


Epoch [2/5]: 100%|██████████| 1123/1123 [00:53<00:00, 21.11it/s, loss=0.000462]


Epoch 2 Average Loss: 0.0044


Epoch [3/5]: 100%|██████████| 1123/1123 [00:53<00:00, 20.90it/s, loss=0.000232]


Epoch 3 Average Loss: 0.0028


Epoch [4/5]: 100%|██████████| 1123/1123 [00:54<00:00, 20.76it/s, loss=0.204]


Epoch 4 Average Loss: 0.0022


Epoch [5/5]: 100%|██████████| 1123/1123 [00:54<00:00, 20.71it/s, loss=0.0079]

Epoch 5 Average Loss: 0.0016





In [None]:
model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for batch in val_loader:
        input_ids, labels = batch
        input_ids = input_ids.to(device)
        labels = labels.to(device)

        outputs = model(input_ids)
        preds = torch.argmax(outputs, dim=1)

        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(true_labels, predictions)
print(f"\nValidation Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(true_labels, predictions, target_names=["Fake", "Real"]))


Validation Accuracy: 0.9986

Classification Report:
              precision    recall  f1-score   support

        Fake       1.00      1.00      1.00      4765
        Real       1.00      1.00      1.00      4215

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980

