In [1]:
import os
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import mixed_precision
from sklearn.metrics import confusion_matrix, classification_report 
from transformers import BertTokenizer
import tensorflow_addons as tfa

os.chdir('C:/Users/makri/OneDrive/Documents/GitHub/Deep_Learning_Final_Project')

data_train = pd.read_csv('all_train(1).tsv', sep='\t')
data_test = pd.read_csv('all_test_public.tsv', sep='\t')
data_validate = pd.read_csv('all_validate.tsv', sep='\t')

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Enable memory growth for the first (and only) GPU
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print(f"Memory growth enabled for {gpus[0]}")
    except RuntimeError as e:
        print(e)  # This happens if GPUs are initialized before setting memory growth
else:
    print("No GPU found. Running on CPU.")

Memory growth enabled for PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
BATCH_SIZE = 16
SEED = 42
# Training data
X_train = data_train['clean_title'].values  
y_train = data_train['2_way_label'].values 

# Validation data
X_val = data_validate['clean_title'].values
y_val = data_validate['2_way_label'].values


In [4]:
X_train = [str(x) for x in X_train]
X_val = [str(x) for x in X_val]

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(X_train, truncation=True, padding='max_length', max_length=30, return_tensors="tf")

val_encodings = tokenizer(
    X_val,
    truncation=True,
    padding='max_length',
    max_length=30,
    return_tensors="tf"
)



In [6]:
# Prepare dataset
inputs = {
    'input_word_ids': train_encodings['input_ids'],
    'input_mask': train_encodings['attention_mask'],
    'input_type_ids': train_encodings['token_type_ids']
}
labels = tf.cast(y_train, tf.float32)


val_inputs = {
    'input_word_ids': val_encodings['input_ids'],
    'input_mask': val_encodings['attention_mask'],
    'input_type_ids': val_encodings['token_type_ids']
}
val_labels = tf.cast(y_val, tf.float32)


# Now build dataset properly
train_ds = tf.data.Dataset.from_tensor_slices((inputs,  labels)).shuffle(buffer_size=len(X_train),seed=SEED).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
val_ds = tf.data.Dataset.from_tensor_slices((val_inputs, val_labels))\
         .batch(BATCH_SIZE)\
         .prefetch(tf.data.AUTOTUNE)


In [7]:
# Build Model
mixed_precision.set_global_policy('mixed_float16')

# BERT encoder 
bert_model = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3",
    trainable=True
)


# Inputs
input_ids = tf.keras.Input(shape=(30,), dtype=tf.int32, name="input_word_ids")
input_mask = tf.keras.Input(shape=(30,), dtype=tf.int32, name="input_mask")
type_ids = tf.keras.Input(shape=(30,), dtype=tf.int32, name="input_type_ids")

bert_inputs = {
    'input_word_ids': input_ids,
    'input_mask': input_mask,
    'input_type_ids': type_ids
}

bert_outputs = bert_model(bert_inputs)
cls_token = bert_outputs['pooled_output']

x = tf.keras.layers.Dropout(0.1)(cls_token)
x = tf.keras.layers.Dense(1, activation='sigmoid', dtype='float32')(x)

model = tf.keras.Model(inputs=[input_ids, input_mask, type_ids], outputs=x)
model.summary()


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4050 Laptop GPU, compute capability 8.9
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_mask (InputLayer)        [(None, 30)]         0           []                               
                                                                                                  
 input_type_ids (InputLayer)    [(None, 30)]         0           []                               
                                                                                                  
 input_word_ids (InputLayer)    [(None, 30)]         0           []                               
                                        

In [None]:
model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = tf.keras.optimizers.Adam(2e-5), metrics = ['accuracy'])

In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=1)


Epoch 1/2




Epoch 2/2


In [10]:

X_test = data_test['clean_title'].values
y_test = data_test['2_way_label'].values


X_test = [str(x) for x in X_test]

test_encodings = tokenizer(X_test, truncation=True, padding='max_length', max_length=30, return_tensors="tf")

# Prepare dataset
inputs_test = {
    'input_word_ids': test_encodings['input_ids'],
    'input_mask': test_encodings['attention_mask'],
    'input_type_ids': test_encodings['token_type_ids']
}

# Predictions
predictions = model.predict(dict(inputs_test))

threshold = 0.5
preds = (predictions>threshold).astype(int)

print(classification_report(preds,y_test, target_names = ['Fake','Real']))

              precision    recall  f1-score   support

        Fake       0.89      0.90      0.89     49634
        Real       0.88      0.87      0.88     42810

    accuracy                           0.89     92444
   macro avg       0.89      0.89      0.89     92444
weighted avg       0.89      0.89      0.89     92444



In [25]:
model.save('./my_model_bert_base')



INFO:tensorflow:Assets written to: ./my_model_bert_base\assets


INFO:tensorflow:Assets written to: ./my_model_bert_base\assets


## Reloaded Model ##

In [6]:
reloaded_model = tf.keras.models.load_model('./my_model_bert_base')


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4050 Laptop GPU, compute capability 8.9


In [11]:
news_examples = [
"Election Software Firm Admits Votes Were Reassigned 'for Testing Purposes'",

"Politician Caught Using Deepfake Avatar in Parliament for Months",

"Secret Island Meeting: World Leaders Plan Global Currency Reset",

"Ex-Bodyguard Claims President Is a Hologram Operated by Military AI",

"Anonymous Hackers Leak Evidence of Mind-Control Satellite Funded by Government",

"Opposition Party Linked to Ancient Secret Society in Newly Unearthed Files",

"Entire Town Admits to Being Paid Actors in National Election",

"CIA Insider Warns of 'Fake Crisis' to Postpone Next Election",

"Presidential Candidate Allegedly Communicates with Voters via Brainwaves",

"Voter IDs to Be Replaced with DNA Tests in New Controversial Bill",
"Scientists Discover Pineapple Juice Can Reverse Alzheimer’s in 7 Days",

"New Superfood From Siberia Claims to Regrow Lost Organs",

"Vaccine Turns Out to Be Tracking Device—Bluetooth Scanners Confirm",

"Doctors Say Walking Barefoot on Grass Aligns Your DNA with the Universe",

"Billionaire Admits Fluoride in Water Is Used to Control Population",

"Hospital Replaces Surgeons with AI Robot That ‘Guesses’ the Procedure",

"Government Classifies New 'Emotion Virus' That Spreads Through Screens",

"Unvaccinated Communities Found to Have Higher IQs, Says Dubious Study",

"Pharmaceutical Companies Suppress 3,000-Year-Old Herbal Cure for Diabetes",

"Eating Only Blue Foods for a Week Reported to ‘Cleanse the Mind’",
"Time Travel Accidentally Achieved in University Physics Lab",

"Tesla Coil Emits Signal That Attracts UFOs, Claims Amateur Scientist",

"Dinosaurs May Still Exist in Underground Caves, New Satellite Images Suggest",

"Scientists Discover Earth Is Gently Expanding—‘Global Swelling’ Confirmed",

"Human Brain Can Connect to WiFi—Experiment 'Silenced' by Telecom Giants",

"Refrigerator Magnets Found to Disturb Earth’s Magnetic Field in Rural Areas",

"New Black Hole Found Near Earth—Only Detectable by Dogs",

"AI Predicts End of World in 2031 Using Ancient Mayan Algorithm",

"Higgs Boson Particle Causes Déjà Vu, Say Quantum Theorists",

"Researchers Accidentally Create Miniature Sun in Basement Lab"
]

news_examples_real =  [
    "UN Passes New Climate Accord Aimed at Curbing Global Emissions by 2040",
    "FDA Approves First-Ever Oral Treatment for Rare Autoimmune Disorder",
    "Senate Votes to Extend Federal Student Loan Relief Program",
    "NASA Launches Mission to Study Jupiter's Icy Moons",
    "New Research Links Regular Exercise to Improved Cognitive Function in Older Adults",
    "Global Inflation Eases as Supply Chains Stabilize, Report Finds",
    "Supreme Court to Hear Case on Digital Privacy and User Data Rights",
    "Study Shows Mediterranean Diet Lowers Risk of Heart Disease",
    "World Health Organization Declares End to Recent Ebola Outbreak",
    "President Signs Executive Order to Expand Rural Internet Access",
    "Scientists Develop Biodegradable Battery Using Seaweed Extract",
    "City Council Approves Plan to Increase Affordable Housing Units",
    "CDC Reports Decline in U.S. Smoking Rates, Highest Drop in a Decade",
    "Tech Companies Pledge to Combat AI-Generated Misinformation",
    "Archaeologists Discover Lost Ancient Settlement in Amazon Rainforest",
    "Education Department Launches Initiative to Improve Digital Literacy in Schools",
    "New York Implements Congestion Pricing to Reduce City Traffic",
    "Researchers Achieve Breakthrough in Nuclear Fusion Stability",
    "Bipartisan Bill Introduced to Modernize National Infrastructure",
    "Major Hospitals Roll Out AI Tools to Improve Diagnostic Accuracy",
    "New Marine Reserve Protects Critical Coral Reef Ecosystem",
    "Federal Reserve Maintains Interest Rate Amid Economic Uncertainty",
    "Volunteers Restore Thousands of Acres of Forest in Conservation Push",
    "Scientists Map Brain Circuits Tied to Emotional Processing",
    "U.S. Unemployment Hits Lowest Level Since 2019, Labor Dept. Says",
    "Study Confirms Effectiveness of Early Childhood Education Programs",
    "Health Officials Urge Vaccinations Ahead of Predicted Flu Surge",
    "Electric Vehicle Sales Reach New Record in Global Market",
    "Court Rules in Favor of Free Speech Protections in Landmark Case",
    "Major Breakthrough in Parkinson’s Disease Treatment Announced"
]


real_news_titles = [
    "Top Turkish Refiner Tupras Resumes Buying Russian Urals Crude",
    "Emmanuel Macron Meets US Envoys for Talks on Ukraine",
    "Global Stock Markets Plunge Amid US Tariff Announcements",
    "'Hands Off' Protests Erupt Across US Against Trump Administration Policies",
    "Green Day's Coachella Performance Sparks Palm Tree Fire",
    "Sarah Ferguson, Bill Nye, and Laverne Cox Join Global Citizen NOW Summit Lineup",
    "Birmingham Bin Strike Draws International Attention Amid Waste Accumulation",
    "Labour MP Dan Norris Suspended Following Arrest on Serious Charges",
    "Former Scottish Conservative MSP Jamie Greene Defects to Liberal Democrats",
    "Parliament Recalled to Debate Emergency Law to Save British Steel",
    "King Charles Addresses Italian Parliament and Meets Pope Francis",
    "MPs Yuan Yang and Abtisam Mohamed Denied Entry to Israel",
    "Cabinet Office to Cut 2,100 Civil Servant Roles in Government Reform",
    "Former Minister Penny Mordaunt Joins British American Tobacco's Advisory Board",
    "MP Lucy Powell's Twitter Account Hacked to Promote Cryptocurrency Scam",
    "Welsh Government Confirms Senedd Vote on Assisted Dying Legislation",
    "First Live Footage of Colossal Squid Captured in Deep Ocean",
    "Strongest Evidence Yet of Life Found on Exoplanet K2-18b",
    "SpaceX's Fram2 Mission Achieves First Crewed Polar Orbit Flight",
    "Colossal Biosciences Unveils Genetically Modified Wolves Resembling Dire Wolves",
    "President Trump Announces Sweeping Tariffs on Global Imports",
    "Hungary Withdraws from International Criminal Court Amid Netanyahu Visit",
    "South Korea's Constitutional Court Upholds Impeachment of President Yoon Suk Yeol",
    "Ecuador's President Daniel Noboa Re-elected in General Election",
    "Gabonese President Brice Oligui Nguema Wins Full Term Post-Coup",
    "Andrija Jelavic Among Top International Recruits for Kentucky Wildcats Basketball",
    "Birmingham's Waste Crisis Highlights Public Service Challenges in UK",
    "Global Citizen NOW Summit to Address Poverty and Sustainability with Star-Studded Lineup",
    "Coachella 2025 Features High-Profile Performances Amid Safety Concerns",
    "'Hands Off' Demonstrations Signal Widespread Opposition to US Administration Policies"
]

politifact = ["Brad Schimel said that he wanted to be part of Donald Trump's support network.",
              "Less than half of Americans have $1,000 in savings.",
              "Cody Balmer, the person charged with setting fire to Pennsylvania governor’s residence, posted a photo that said “F--- Trump.",
              "U.S. Rep. Jasmine Crockett says “she ‘simply forgot' to inform Social Security that her grandmother died in 2012 … She’s been collecting $2,600 a month for 13 years.”",
              "Photos and videos show Washington, D.C., protests against White House adviser Elon Musk are “staged & paid — bussed in, scripted, clocked out.”",
              "In 2024, “China made $1 trillion off trade with the United States.”",
              "Under the previous administration … the cost of a median-price home in America more than doubled, and that was just in four years. "]

examples_encodings = tokenizer(politifact, truncation=True, padding='max_length',max_length=30, return_tensors="tf")
examples_encodings_real = tokenizer(real_news_titles, truncation=True, padding='max_length',max_length=30, return_tensors="tf")


In [12]:
inputs = {
    'input_word_ids': examples_encodings['input_ids'],
    'input_mask': examples_encodings['attention_mask'],
    'input_type_ids': examples_encodings['token_type_ids']
}
inputs_real = {
    'input_word_ids': examples_encodings_real['input_ids'],
    'input_mask': examples_encodings_real['attention_mask'],
    'input_type_ids': examples_encodings_real['token_type_ids']
}




In [13]:

results = reloaded_model(inputs)

In [14]:
results

<tf.Tensor: shape=(7, 1), dtype=float32, numpy=
array([[0.9269838 ],
       [0.971058  ],
       [0.9593943 ],
       [0.9716893 ],
       [0.72715485],
       [0.78166467],
       [0.84037626]], dtype=float32)>