### In this script we test our Multi-Task BERT-base model on the Megafake dataset. ###

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import mixed_precision
from sklearn.metrics import confusion_matrix, classification_report 
from transformers import BertTokenizer
import pandas as pd
import zipfile
import os 
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.utils import resample



  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [2]:
# Reading Megafake

with zipfile.ZipFile('megafake_test_df.csv.zip','r') as zip:
    with zip.open('megafake_test_df.csv') as f:
        megafake_test = pd.read_csv(f,encoding='ISO-8859-1')

with zipfile.ZipFile('megafake_train_df.csv.zip','r') as zip:
    with zip.open('megafake_train_df.csv') as f:
        megafake_train = pd.read_csv(f,encoding='ISO-8859-1')

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Enable memory growth for the first (and only) GPU
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print(f"Memory growth enabled for {gpus[0]}")
    except RuntimeError as e:
        print(e)  # This happens if GPUs are initialized before setting memory growth
else:
    print("No GPU found. Running on CPU.")

Memory growth enabled for PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
# Loading Model

model = tf.keras.models.load_model('BFN-AI')

In [6]:
megafake_train['generation_technique'].value_counts()

generation_technique
style based          89131
content based        53583
story based          44138
integration based    29222
Name: count, dtype: int64

In [4]:
# We remove label 1 (Real) because our model was not trained on LLM-generated real news,
# since we believe that this is a rarely occuring scenario in the real-world.

megafake_train = megafake_train[megafake_train['binary_label']!=1]
megafake_train['Human_vs_AI'] = 1
megafake_test = megafake_test[megafake_test['binary_label']!=1]
megafake_test['Human_vs_AI'] = 1

megafake = pd.concat([megafake_train,megafake_test],axis=0)



In [5]:
megafake

Unnamed: 0,id,dataset,generation_technique,text,text_clean,chunk_id,title,binary_label,Human_vs_AI
0,MegaFake_25258,MegaFake,style based,secret tickle-fetish sleeper cells. What we lo...,secret ticklefetish sleeper cells what we love...,19,False,0,1
3,MegaFake_150633,MegaFake,story based,"Selena Gomez is known for her stunning glow, a...",selena gomez is known for her stunning glow an...,1,False,0,1
4,MegaFake_62305,MegaFake,style based,"have happened at the end of the season, for va...",have happened at the end of the season for var...,3,False,0,1
5,MegaFake_39737,MegaFake,style based,a crucial part of the series since its incepti...,a crucial part of the series since its incepti...,2,False,0,1
6,MegaFake_11495,MegaFake,style based,hours after the report was published to expres...,hours after the report was published to expres...,2,False,0,1
...,...,...,...,...,...,...,...,...,...
54011,MegaFake_94170,MegaFake,content based,or is Chyna to blame? Or could both be at fault?,or is chyna to blame or could both be at fault,5,False,0,1
54012,MegaFake_140128,MegaFake,integration based,"the Kennedy compound in Massachusetts, accordi...",the kennedy compound in massachusetts accordin...,3,False,0,1
54013,MegaFake_193411,MegaFake,story based,"several other films and TV shows, including 'G...",several other films and tv shows including gil...,5,False,0,1
54015,MegaFake_28210,MegaFake,style based,"""Get Ready for the Wildest Season Yet of The R...",get ready for the wildest season yet of the re...,1,False,0,1


In [6]:
len(megafake)


203530

In [13]:

X_test = megafake['text'].values  
y_test = {
    'Fake_News_Output': megafake['binary_label'].values,
    'Human_vs_AI': megafake['Human_vs_AI'].values
} 


In [14]:
X_test = [str(x) for x in X_test]

In [None]:
# Tokenization

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

test_encodings = tokenizer(X_test, truncation=True, padding = 'max_length',max_length=60, return_tensors="tf")





In [16]:
# # Prepare dataset
inputs_test = {
    'input_word_ids': test_encodings['input_ids'],
    'input_mask': test_encodings['attention_mask'],
    'input_type_ids': test_encodings['token_type_ids']
}

# Predictions fake news
predictions = model.predict(dict(inputs_test))

threshold = 0.5
preds = (predictions[0]>threshold).astype(int)
print(classification_report(preds,y_test['Fake_News_Output']))


# Predictions Human-vs-AI
preds_new = (predictions[1]>threshold).astype(int)
print(classification_report(preds_new,y_test['Human_vs_AI']))


              precision    recall  f1-score   support

           0       0.89      1.00      0.94    181441
           1       0.00      0.00      0.00     22089

    accuracy                           0.89    203530
   macro avg       0.45      0.50      0.47    203530
weighted avg       0.79      0.89      0.84    203530

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     32769
           1       0.84      1.00      0.91    170761

    accuracy                           0.84    203530
   macro avg       0.42      0.50      0.46    203530
weighted avg       0.70      0.84      0.77    203530



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Per 'generation technique' accuracy (Fake News Detection)
preds_new_new = pd.DataFrame(preds,index=megafake.index)
concat = pd.concat([megafake,preds_new_new],axis=1)

concat.columns.values[-1] = 'preds'

display(concat['preds'].value_counts())

accuracy_df = (concat['preds'] == concat['binary_label']).groupby(concat['generation_technique']).mean()

print(accuracy_df)


# Per 'generation technique'  accuracy (Human_vs_AI)
preds_new_new = pd.DataFrame(preds_new,index=megafake.index)
concat = pd.concat([megafake,preds_new_new],axis=1)

concat.columns.values[-1] = 'preds'

display(concat['preds'].value_counts())

accuracy_df = (concat['preds'] == concat['Human_vs_AI']).groupby(concat['generation_technique']).mean()

print(accuracy_df)




preds
0    181441
1     22089
Name: count, dtype: int64

generation_technique
content based        0.803237
integration based    0.875458
story based          0.986189
style based          0.904327
dtype: float64


preds
1    170761
0     32769
Name: count, dtype: int64

generation_technique
content based        0.707968
integration based    0.818645
story based          0.976293
style based          0.860159
dtype: float64
