In [None]:
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import DataLoader, TensorDataset

# Load the CSV file (test dataset)
test_data = pd.read_csv("test_dataset.csv")

# Load model and tokenizer
model_name = "fake_news_classification"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

batch_size = 32
max_length = 128

# Tokenize the dataset
inputs = tokenizer(
    list(test_data['text']),
    padding=True,
    truncation=True,
    return_tensors="pt",
    max_length=max_length
)

# Create a DataLoader for batching
dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'])
data_loader = DataLoader(dataset, batch_size=batch_size)

predictions = []

# Perform inference in batches
with torch.no_grad():
    for batch in data_loader:
        input_ids, attention_mask = [x.to(device) for x in batch]
        
        # Forward pass: get model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        # Get the predicted class (index with the highest score)
        predicted_classes = torch.argmax(logits, dim=-1)
        predictions.extend(predicted_classes.tolist())

# Add predictions to the test data
test_data['pred'] = predictions

test_data.to_csv("test_predictions.csv", index=False)

print("Predictions saved to 'test_predictions.csv'.")
