**Step 1: Add Device Definition at the Beginning**

In [None]:
import torch

# Define the device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Check the device
print(f"Using device: {device}")


**Step 2: Check GPU Availability**

In [None]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available


**Step 3: Install Required Libraries**

In [None]:
!pip install transformers torch torchvision pandas scikit-learn

**Step 4: Download the Dataset**

In [None]:
!git clone https://github.com/KaiDMML/FakeNewsNet.git


In [None]:
!ls FakeNewsNet/dataset


**Step 5: Load and Preprocess the Data**

In [None]:
import pandas as pd

# Load GossipCop data
df_fake_gossipcop = pd.read_csv('FakeNewsNet/dataset/gossipcop_fake.csv')
df_real_gossipcop = pd.read_csv('FakeNewsNet/dataset/gossipcop_real.csv')

# Add label: 0 for fake, 1 for real
df_fake_gossipcop['label'] = 0
df_real_gossipcop['label'] = 1

# Combine fake and real news data
df_combined = pd.concat([df_fake_gossipcop, df_real_gossipcop])

# Check data
print(df_combined.head())


In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize a sample text from the dataset
def preprocess_text(text):
    inputs = tokenizer(text, return_tensors='pt', max_length=128, truncation=True, padding='max_length')
    return inputs['input_ids'], inputs['attention_mask']

sample_text = df_combined['title'].iloc[0]
input_ids, attention_mask = preprocess_text(sample_text)


**Step 6: Model Definition: ViLBERT**

In [None]:
import torch.nn as nn
from transformers import BertModel

class ViLBERTClassifier(nn.Module):
    def __init__(self):
        super(ViLBERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.classifier = nn.Linear(self.bert.config.hidden_size, 1)  # For binary classification

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]  # CLS token output
        logits = self.classifier(cls_output)
        return logits


In [None]:
model = ViLBERTClassifier().cuda()  # Ensure the model is on the GPU


**Step 7: Training Loop**

7.1. Optimizer, loss function, and hyperparameters:

In [None]:
from torch.optim import Adam

# Define the optimizer and loss function
optimizer = Adam(model.parameters(), lr=2e-5)
criterion = nn.BCEWithLogitsLoss()  # Binary classification loss


7.2.Training loop:

In [None]:
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0

    for batch in train_loader:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]

        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = criterion(logits.squeeze(), labels.float())
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    return total_loss / len(train_loader)


**Step 8: DataLoader and Training Execution**

8.1. Create the DataLoader for training:

In [None]:
from torch.utils.data import DataLoader, TensorDataset
import torch

# Prepare data for DataLoader
input_ids_list, attention_mask_list, labels_list = [], [], []
for text, label in zip(df_combined['title'], df_combined['label']):
    input_ids, attention_mask = preprocess_text(text)
    input_ids_list.append(input_ids)
    attention_mask_list.append(attention_mask)
    labels_list.append(torch.tensor(label))

# Convert lists to tensors
dataset = TensorDataset(torch.cat(input_ids_list), torch.cat(attention_mask_list), torch.tensor(labels_list))

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)


8.2. Train the model:

In [None]:
num_epochs = 3  # You can adjust based on your computational capacity

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device='cuda')
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}')


8.3 Create the Test DataLoader:

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets (e.g., 80% train, 20% test)
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df_combined['title'], df_combined['label'], test_size=0.2, random_state=42)

# Preprocess and prepare the data for the test loader
test_input_ids_list, test_attention_mask_list, test_labels_list = [], [], []
for text, label in zip(test_texts, test_labels):
    input_ids, attention_mask = preprocess_text(text)
    test_input_ids_list.append(input_ids)
    test_attention_mask_list.append(attention_mask)
    test_labels_list.append(torch.tensor(label))

# Convert test lists to tensors
test_dataset = TensorDataset(
    torch.cat(test_input_ids_list),
    torch.cat(test_attention_mask_list),
    torch.tensor(test_labels_list)
)

# Create the test DataLoader
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


**Step 9: Saving the Model Correctly**

In [None]:
# Correct way to save the model using PyTorch
model_save_path = 'vilbert_fakenews_model.pth'

# Save only the model's state_dict (weights).
#torch.save(model.state_dict(), path), This saves the model’s learned parameters (weights).
torch.save(model.state_dict(), model_save_path)

# To load the model back, use:
# model = VilBERTClassifier()
# model.load_state_dict(torch.load(model_save_path)) #This will be used later to load the saved weights into the same model architecture when needed.
# model.to(device)


**Step 10: Downloading the Model to Your Local Machine**

In [None]:
from google.colab import files

# Download the saved model file
files.download('vilbert_fakenews_model.pth')


**Step 11: Evaluating the Model on Test Data**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():  # Turn off gradients for evaluation
        for input_ids, attention_mask, labels in test_loader:
            input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
            labels = labels.to(device)

            # Forward pass, get predictions
            outputs = model(input_ids, attention_mask)
            logits = outputs.squeeze().cpu().numpy()  # Convert to numpy for easier handling
            preds = (logits > 0.5).astype(int)  # Apply threshold to get binary predictions

            predictions.extend(preds)
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-Score: {f1:.4f}')

# Evaluate the model
evaluate_model(model, test_loader, device)
