In [14]:
import numpy as np
import pandas as pd

import os

import time
import logging

from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [15]:
file_path = '/Users/alex_fassone/Documents/MSc Statistics/ST456/Coursework/Project/Data'
file_path_combined = file_path + '/Combined'

def load_combined_split(filename):
    path = os.path.join(file_path_combined, filename)
    df = pd.read_csv(path)
    return df

In [17]:
test_df = load_combined_split('test_df.csv')

In [20]:
input_df = test_df

In [21]:
def add_model_predictions(input_df, model_name, text_column='text', batch_size=32):
    """
    Adds predicted label and probability columns to input_df using a Hugging Face model.
    
    Parameters:
        input_df (pd.DataFrame): The input dataframe containing text.
        model_name (str): The Hugging Face model identifier.
        text_column (str): Column in the dataframe with input text.
        
    Returns:
        pd.DataFrame: A copy of the dataframe with new prediction columns.
    """

    logger.info(f"Loading model: {model_name}")

    # Create prediction pipeline
    pipe = pipeline(
        "text-classification", 
        model=model_name,
        tokenizer=model_name,
        truncation=True,
        padding=True,
        max_length=512
    )

    logger.info("Model and tokenizer loaded.")

    # Process in batches
    texts = input_df[text_column].tolist()
    total = len(texts)
    predictions = []

    logger.info(f"Starting prediction on {total} samples (batch size = {batch_size})...")   

    # Get predictions
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        preds = pipe(batch)
        predictions.extend(preds)
        logger.info(f"Processed {min(i + batch_size, total)} / {total}")

    # Parse outputs
    predicted_labels = [0 if pred['label'] == 'LABEL_0' else 1 for pred in predictions]
    predicted_probs = [pred['score'] for pred in predictions]

    # Format column names
    label_col = f'predicted_label_{model_name.split("/")[-1]}'
    #prob_col = f'predicted_prob_{model_name.split("/")[-1]}'

    logger.info(f"Added columns: {label_col}")

    # Return updated copy
    output_df = input_df.copy()
    output_df[label_col] = predicted_labels
    #output_df[prob_col] = predicted_probs

    return output_df

In [22]:
# Define your models
model_dict = {
    "Fake-News-Bert": "jy46604790/Fake-News-Bert-Detect",
    "DistilBERT-FakeNews": "harshhmaniya/fake-news-classification-distilbert-fine-tuned",
    "Albert-Base-v2": "XSY/albert-base-v2-fakenews-discriminator"
    # Add more models here...
}

# Start with the base DataFrame
df_with_preds = input_df.copy()

# Loop through and apply each model
for name, model_path in model_dict.items():
    print(f"\nRunning predictions for: {name}")
    df_with_preds = add_model_predictions(
        df_with_preds,
        model_name=model_path,
        text_column='text_clean',
        batch_size=100  # You can adjust this
    )

INFO:__main__:Loading model: jy46604790/Fake-News-Bert-Detect



Running predictions for: Fake-News-Bert


Device set to use mps:0
INFO:__main__:Model and tokenizer loaded.
INFO:__main__:Starting prediction on 20000 samples (batch size = 100)...
INFO:__main__:Processed 100 / 20000
INFO:__main__:Processed 200 / 20000
INFO:__main__:Processed 300 / 20000
INFO:__main__:Processed 400 / 20000
INFO:__main__:Processed 500 / 20000
INFO:__main__:Processed 600 / 20000
INFO:__main__:Processed 700 / 20000
INFO:__main__:Processed 800 / 20000
INFO:__main__:Processed 900 / 20000
INFO:__main__:Processed 1000 / 20000
INFO:__main__:Processed 1100 / 20000
INFO:__main__:Processed 1200 / 20000
INFO:__main__:Processed 1300 / 20000
INFO:__main__:Processed 1400 / 20000
INFO:__main__:Processed 1500 / 20000
INFO:__main__:Processed 1600 / 20000
INFO:__main__:Processed 1700 / 20000
INFO:__main__:Processed 1800 / 20000
INFO:__main__:Processed 1900 / 20000
INFO:__main__:Processed 2000 / 20000
INFO:__main__:Processed 2100 / 20000
INFO:__main__:Processed 2200 / 20000
INFO:__main__:Processed 2300 / 20000
INFO:__main__:Proce


Running predictions for: DistilBERT-FakeNews


All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at harshhmaniya/fake-news-classification-distilbert-fine-tuned.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
Device set to use 0
INFO:__main__:Model and tokenizer loaded.
INFO:__main__:Starting prediction on 20000 samples (batch size = 100)...
INFO:__main__:Processed 100 / 20000
INFO:__main__:Processed 200 / 20000
INFO:__main__:Processed 300 / 20000
INFO:__main__:Processed 400 / 20000
INFO:__main__:Processed 500 / 20000
INFO:__main__:Processed 600 / 20000
INFO:__main__:Processed 700 / 20000
INFO:__main__:Processed 800 / 20000
INFO:__main__:Processed 900 / 20000
INFO:__main__:Processed 1000 / 20000
INFO:__main__:Processed 1100 / 20000
INFO:__main__:Processed 1200


Running predictions for: Albert-Base-v2


Device set to use mps:0
INFO:__main__:Model and tokenizer loaded.
INFO:__main__:Starting prediction on 20000 samples (batch size = 100)...
INFO:__main__:Processed 100 / 20000
INFO:__main__:Processed 200 / 20000
INFO:__main__:Processed 300 / 20000
INFO:__main__:Processed 400 / 20000
INFO:__main__:Processed 500 / 20000
INFO:__main__:Processed 600 / 20000
INFO:__main__:Processed 700 / 20000
INFO:__main__:Processed 800 / 20000
INFO:__main__:Processed 900 / 20000
INFO:__main__:Processed 1000 / 20000
INFO:__main__:Processed 1100 / 20000
INFO:__main__:Processed 1200 / 20000
INFO:__main__:Processed 1300 / 20000
INFO:__main__:Processed 1400 / 20000
INFO:__main__:Processed 1500 / 20000
INFO:__main__:Processed 1600 / 20000
INFO:__main__:Processed 1700 / 20000
INFO:__main__:Processed 1800 / 20000
INFO:__main__:Processed 1900 / 20000
INFO:__main__:Processed 2000 / 20000
INFO:__main__:Processed 2100 / 20000
INFO:__main__:Processed 2200 / 20000
INFO:__main__:Processed 2300 / 20000
INFO:__main__:Proce

In [10]:
def evaluate_model_predictions(df, label_col='binary_label', group_by_dataset=False):
    """
    Evaluate prediction performance for each model in the DataFrame.

    Parameters:
        df (pd.DataFrame): DataFrame containing ground truth and predicted labels.
        label_col (str): Column name of the ground truth labels.
        group_by_dataset (bool): If True, compute metrics grouped by 'dataset'.

    Returns:
        pd.DataFrame: DataFrame of evaluation metrics.
    """
    metrics = []
    pred_cols = [col for col in df.columns if col.startswith('predicted_label_')]

    if group_by_dataset:
        grouped = df.groupby('dataset')
        for dataset_name, group in grouped:
            for col in pred_cols:
                y_true = group[label_col]
                y_pred = group[col]
                model_name = col.replace('predicted_label_', '')
                
                metrics.append({
                    'dataset': dataset_name,
                    'model': model_name,
                    'accuracy': accuracy_score(y_true, y_pred),
                    'precision': precision_score(y_true, y_pred, zero_division=0),
                    'recall': recall_score(y_true, y_pred, zero_division=0),
                    'f1_score': f1_score(y_true, y_pred, zero_division=0)
                })
    else:
        for col in pred_cols:
            y_true = df[label_col]
            y_pred = df[col]
            model_name = col.replace('predicted_label_', '')
            
            metrics.append({
                'dataset': 'ALL',
                'model': model_name,
                'accuracy': accuracy_score(y_true, y_pred),
                'precision': precision_score(y_true, y_pred, zero_division=0),
                'recall': recall_score(y_true, y_pred, zero_division=0),
                'f1_score': f1_score(y_true, y_pred, zero_division=0)
            })

    return pd.DataFrame(metrics)

In [23]:
overall_model_metrics = evaluate_model_predictions(df_with_preds)
dataset_grouped_model_metrics = evaluate_model_predictions(df_with_preds, group_by_dataset=True)

In [24]:
overall_model_metrics

Unnamed: 0,dataset,model,accuracy,precision,recall,f1_score
0,ALL,Fake-News-Bert-Detect,0.5029,0.941535,0.072466,0.134575
1,ALL,fake-news-classification-distilbert-fine-tuned,0.4786,0.97992,0.022874,0.044705
2,ALL,albert-base-v2-fakenews-discriminator,0.48685,0.566579,0.161151,0.250931


In [25]:
dataset_grouped_model_metrics

Unnamed: 0,dataset,model,accuracy,precision,recall,f1_score
0,Fakeddit,Fake-News-Bert-Detect,0.45975,0.761905,0.007366,0.014592
1,Fakeddit,fake-news-classification-distilbert-fine-tuned,0.45725,1.0,0.00046,0.00092
2,Fakeddit,albert-base-v2-fakenews-discriminator,0.4875,0.54314,0.353591,0.428332
3,Kaggle 1 - Fake News,Fake-News-Bert-Detect,0.5825,0.995316,0.203058,0.337302
4,Kaggle 1 - Fake News,fake-news-classification-distilbert-fine-tuned,0.5265,0.995025,0.095557,0.174368
5,Kaggle 1 - Fake News,albert-base-v2-fakenews-discriminator,0.54175,0.869318,0.146202,0.250307
6,Kaggle 2 - News Project,Fake-News-Bert-Detect,0.445,0.563636,0.01392,0.027169
7,Kaggle 2 - News Project,fake-news-classification-distilbert-fine-tuned,0.44325,0.5,0.000898,0.001793
8,Kaggle 2 - News Project,albert-base-v2-fakenews-discriminator,0.45525,0.657895,0.044903,0.084069
9,Kaggle 3 - Fake News Detection,Fake-News-Bert-Detect,0.44625,0.963455,0.116279,0.207513
