In [None]:
!pip install datasets
#



# Learning Rate : 1e-5

In [None]:
from transformers import EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import torch
from torch.nn.functional import softmax

import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset

train_df = pd.read_csv("llmfake_mergedTrain.csv")
test_df = pd.read_csv("llmfake_mergedTest.csv")

def clean_df(df):
   return df.rename(columns={"synthetic_misinformation": "text"})[["text", "label"]]

train_df = clean_df(train_df)
test_df = clean_df(test_df)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_valid = train_dataset.train_test_split(test_size=0.2, seed=42)

train_split = train_valid['train']
valid_split = train_valid['test']

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


model = base_model

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary', zero_division=0)
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


training_args = TrainingArguments(
    output_dir="./bert_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    num_train_epochs=20,
    weight_decay=0.01,
    learning_rate=1e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_split,
    eval_dataset=valid_split,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)


trainer.train()

trainer.evaluate(test_dataset)

test_texts = [
    "The economy is improving according to alien scientists",
     "King Salman is not the King of Saudi Arabia.",#true
     "COVID-19 vaccines have been approved by the World Health Organization", #true
    "Aliens landed in New York and started dancing on Broadway.",
    "	The moon is made entirely of cheese",
    "The Eiffel Tower is located in Paris, France"#true
]

inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
with torch.no_grad():
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1)
    preds = torch.argmax(probs, dim=1)

for text, pred in zip(test_texts, preds):
    print(f"Text: {text}")
    print("Prediction:", "True News ✅" if pred == 1 else "Misinformation ❌")
    print("-" * 60)


final_results = trainer.evaluate(test_dataset)
print("\n📊 Final Evaluation Results on Test Set:")
for key, value in final_results.items():
    print(f"{key}: {value:.4f}")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/5336 [00:00<?, ? examples/s]

Map:   0%|          | 0/1336 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshouqaldous5[0m ([33mshouqaldous5-king-saud-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3801,0.337401,0.878277,0.830941,0.955963,0.889078
2,0.1828,0.160643,0.942884,0.915808,0.977982,0.945874
3,0.1135,0.08502,0.970974,0.970696,0.972477,0.971586
4,0.0237,0.098135,0.973783,0.984991,0.963303,0.974026
5,0.0145,0.108602,0.973783,0.981378,0.966972,0.974122
6,0.013,0.104094,0.977528,0.972777,0.983486,0.978102


Text: The economy is improving according to alien scientists
Prediction: True News ✅
------------------------------------------------------------
Text: King Salman is not the King of Saudi Arabia.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: COVID-19 vaccines have been approved by the World Health Organization
Prediction: True News ✅
------------------------------------------------------------
Text: Aliens landed in New York and started dancing on Broadway.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: 	The moon is made entirely of cheese
Prediction: Misinformation ❌
------------------------------------------------------------
Text: The Eiffel Tower is located in Paris, France
Prediction: True News ✅
------------------------------------------------------------

📊 Final Evaluation Results on Test Set:
eval_loss: 0.0988
eval_accuracy: 0.9663
eval_precision: 0.9688
eval_recall: 0.9613
eval

# Learning Rate : 2e-5

In [None]:
from transformers import EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import torch
from torch.nn.functional import softmax

import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset

train_df = pd.read_csv("llmfake_mergedTrain.csv")
test_df = pd.read_csv("llmfake_mergedTest.csv")

def clean_df(df):
   return df.rename(columns={"synthetic_misinformation": "text"})[["text", "label"]]

train_df = clean_df(train_df)
test_df = clean_df(test_df)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_valid = train_dataset.train_test_split(test_size=0.2, seed=42)

train_split = train_valid['train']
valid_split = train_valid['test']

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


model = base_model

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary', zero_division=0)
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


training_args = TrainingArguments(
    output_dir="./bert_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    num_train_epochs=20,
    weight_decay=0.01,
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_split,
    eval_dataset=valid_split,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)


trainer.train()

trainer.evaluate(test_dataset)

test_texts = [
    "The economy is improving according to alien scientists",
     "King Salman is not the King of Saudi Arabia.",#true
     "COVID-19 vaccines have been approved by the World Health Organization", #true
    "Aliens landed in New York and started dancing on Broadway.",
    "	The moon is made entirely of cheese",
    "The Eiffel Tower is located in Paris, France"#true
]

inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
with torch.no_grad():
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1)
    preds = torch.argmax(probs, dim=1)

for text, pred in zip(test_texts, preds):
    print(f"Text: {text}")
    print("Prediction:", "True News ✅" if pred == 1 else "Misinformation ❌")
    print("-" * 60)


final_results = trainer.evaluate(test_dataset)
print("\n📊 Final Evaluation Results on Test Set:")
for key, value in final_results.items():
    print(f"{key}: {value:.4f}")



Map:   0%|          | 0/5336 [00:00<?, ? examples/s]

Map:   0%|          | 0/1336 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2785,0.190376,0.930712,0.933702,0.930275,0.931985
2,0.0865,0.108061,0.970037,0.957219,0.985321,0.971067
3,0.0367,0.15936,0.962547,0.936097,0.994495,0.964413
4,0.0008,0.112603,0.979401,0.964476,0.99633,0.980144
5,0.0055,0.09935,0.983146,0.976492,0.990826,0.983607
6,0.0003,0.095531,0.985019,0.980036,0.990826,0.985401
7,0.0002,0.103765,0.98221,0.978182,0.987156,0.982648
8,0.0002,0.119696,0.978464,0.983333,0.974312,0.978802
9,0.0075,0.093507,0.986891,0.981851,0.992661,0.987226
10,0.0001,0.104483,0.984082,0.97482,0.994495,0.984559


Text: The economy is improving according to alien scientists
Prediction: True News ✅
------------------------------------------------------------
Text: King Salman is not the King of Saudi Arabia.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: COVID-19 vaccines have been approved by the World Health Organization
Prediction: True News ✅
------------------------------------------------------------
Text: Aliens landed in New York and started dancing on Broadway.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: 	The moon is made entirely of cheese
Prediction: Misinformation ❌
------------------------------------------------------------
Text: The Eiffel Tower is located in Paris, France
Prediction: Misinformation ❌
------------------------------------------------------------

📊 Final Evaluation Results on Test Set:
eval_loss: 0.1112
eval_accuracy: 0.9828
eval_precision: 0.9860
eval_recall: 0.9783

# Learning Rate : 3e-5

In [None]:
from transformers import EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import torch
from torch.nn.functional import softmax

import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset

train_df = pd.read_csv("llmfake_mergedTrain.csv")
test_df = pd.read_csv("llmfake_mergedTest.csv")

def clean_df(df):
   return df.rename(columns={"synthetic_misinformation": "text"})[["text", "label"]]

train_df = clean_df(train_df)
test_df = clean_df(test_df)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_valid = train_dataset.train_test_split(test_size=0.2, seed=42)

train_split = train_valid['train']
valid_split = train_valid['test']

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


model = base_model

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary', zero_division=0)
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


training_args = TrainingArguments(
    output_dir="./bert_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    num_train_epochs=20,
    weight_decay=0.01,
    learning_rate=3e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_split,
    eval_dataset=valid_split,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)


trainer.train()

trainer.evaluate(test_dataset)

test_texts = [
    "The economy is improving according to alien scientists",
     "King Salman is not the King of Saudi Arabia.",#true
     "COVID-19 vaccines have been approved by the World Health Organization", #true
    "Aliens landed in New York and started dancing on Broadway.",
    "	The moon is made entirely of cheese",
    "The Eiffel Tower is located in Paris, France"#true
]

inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
with torch.no_grad():
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1)
    preds = torch.argmax(probs, dim=1)

for text, pred in zip(test_texts, preds):
    print(f"Text: {text}")
    print("Prediction:", "True News ✅" if pred == 1 else "Misinformation ❌")
    print("-" * 60)


final_results = trainer.evaluate(test_dataset)
print("\n📊 Final Evaluation Results on Test Set:")
for key, value in final_results.items():
    print(f"{key}: {value:.4f}")



Map:   0%|          | 0/5336 [00:00<?, ? examples/s]

Map:   0%|          | 0/1336 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1874,0.131322,0.953184,0.962617,0.944954,0.953704
2,0.0812,0.085143,0.978464,0.967742,0.990826,0.979148
3,0.0461,0.130847,0.970037,0.950791,0.992661,0.971275
4,0.0109,0.087136,0.981273,0.978142,0.985321,0.981718
5,0.0006,0.072816,0.985019,0.985321,0.985321,0.985321
6,0.0002,0.124621,0.978464,0.966071,0.992661,0.979186
7,0.0001,0.122848,0.978464,0.969424,0.988991,0.97911


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1874,0.131322,0.953184,0.962617,0.944954,0.953704
2,0.0812,0.085143,0.978464,0.967742,0.990826,0.979148
3,0.0461,0.130847,0.970037,0.950791,0.992661,0.971275
4,0.0109,0.087136,0.981273,0.978142,0.985321,0.981718
5,0.0006,0.072816,0.985019,0.985321,0.985321,0.985321
6,0.0002,0.124621,0.978464,0.966071,0.992661,0.979186
7,0.0001,0.122848,0.978464,0.969424,0.988991,0.97911
8,0.0058,0.088093,0.983146,0.987061,0.979817,0.983425


Text: The economy is improving according to alien scientists
Prediction: Misinformation ❌
------------------------------------------------------------
Text: King Salman is not the King of Saudi Arabia.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: COVID-19 vaccines have been approved by the World Health Organization
Prediction: True News ✅
------------------------------------------------------------
Text: Aliens landed in New York and started dancing on Broadway.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: 	The moon is made entirely of cheese
Prediction: Misinformation ❌
------------------------------------------------------------
Text: The Eiffel Tower is located in Paris, France
Prediction: Misinformation ❌
------------------------------------------------------------

📊 Final Evaluation Results on Test Set:
eval_loss: 0.1095
eval_accuracy: 0.9805
eval_precision: 0.9874
eval_recall: 0

# Learning Rate : 5e-5

In [None]:
from transformers import EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import torch
from torch.nn.functional import softmax

import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset

train_df = pd.read_csv("llmfake_mergedTrain.csv")
test_df = pd.read_csv("llmfake_mergedTest.csv")

def clean_df(df):
   return df.rename(columns={"synthetic_misinformation": "text"})[["text", "label"]]

train_df = clean_df(train_df)
test_df = clean_df(test_df)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_valid = train_dataset.train_test_split(test_size=0.2, seed=42)

train_split = train_valid['train']
valid_split = train_valid['test']

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


model = base_model

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary', zero_division=0)
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


training_args = TrainingArguments(
    output_dir="./bert_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    num_train_epochs=20,
    weight_decay=0.01,
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_split,
    eval_dataset=valid_split,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)


trainer.train()

trainer.evaluate(test_dataset)

test_texts = [
    "The economy is improving according to alien scientists",
     "King Salman is not the King of Saudi Arabia.",#true
     "COVID-19 vaccines have been approved by the World Health Organization", #true
    "Aliens landed in New York and started dancing on Broadway.",
    "	The moon is made entirely of cheese",
    "The Eiffel Tower is located in Paris, France"#true
]

inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
with torch.no_grad():
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1)
    preds = torch.argmax(probs, dim=1)

for text, pred in zip(test_texts, preds):
    print(f"Text: {text}")
    print("Prediction:", "True News ✅" if pred == 1 else "Misinformation ❌")
    print("-" * 60)


final_results = trainer.evaluate(test_dataset)
print("\n📊 Final Evaluation Results on Test Set:")
for key, value in final_results.items():
    print(f"{key}: {value:.4f}")



Map:   0%|          | 0/5336 [00:00<?, ? examples/s]

Map:   0%|          | 0/1336 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1534,0.120825,0.955056,0.971537,0.93945,0.955224
2,0.0302,0.140792,0.962547,0.970205,0.955963,0.963031
3,0.0178,0.084839,0.979401,0.971171,0.988991,0.98
4,0.0363,0.146191,0.976592,0.962633,0.992661,0.977416
5,0.0007,0.131396,0.979401,0.969479,0.990826,0.980036
6,0.0066,0.211079,0.967228,0.944251,0.994495,0.968722


Text: The economy is improving according to alien scientists
Prediction: True News ✅
------------------------------------------------------------
Text: King Salman is not the King of Saudi Arabia.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: COVID-19 vaccines have been approved by the World Health Organization
Prediction: True News ✅
------------------------------------------------------------
Text: Aliens landed in New York and started dancing on Broadway.
Prediction: Misinformation ❌
------------------------------------------------------------
Text: 	The moon is made entirely of cheese
Prediction: Misinformation ❌
------------------------------------------------------------
Text: The Eiffel Tower is located in Paris, France
Prediction: Misinformation ❌
------------------------------------------------------------

📊 Final Evaluation Results on Test Set:
eval_loss: 0.1074
eval_accuracy: 0.9775
eval_precision: 0.9768
eval_recall: 0.9768