In [25]:
import pandas as pd
import torch
from transformers import pipeline

**Labelling the news based on the pre-trained model distilbert**

In [20]:
df = pd.read_csv('updated_news_with_predictions.csv')

In [21]:
# Handle missing values in the Headline or Description columns
df.dropna(subset=['Headline', 'Description'], inplace=True)
# Combine 'Headline' and 'Description' into a new column 'combined_text'
df['combined_text'] = df['Headline'].astype(str) + " " + df['Description'].astype(str)

In [23]:
# Check if GPU is available; if not, use CPU
device = 0 if torch.cuda.is_available() else -1

In [26]:
classifier = pipeline(
    'text-classification',
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device=device,          # Automatically use GPU if available
    batch_size=16,          # Process 16 rows at a time for faster execution
    truncation=True         # Ensure text is truncated to avoid token limit errors
)


In [29]:
classifier = pipeline(
    'text-classification',
    model='distilbert-base-uncased-finetuned-sst-2-english',
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
    truncation=True,  # Enable truncation to handle long sequences
)

In [30]:
# Classify in batches
results = classifier(list(df['combined_text']), batch_size=16)

In [19]:
# Add the predictions to the DataFrame
df['Predicted_Truthfulness'] = [result['label'] for result in results]

In [31]:
# Save the updated DataFrame with predictions to a new CSV file
df.to_csv('updated_news_with_predictions.csv', index=False)