<a href="https://colab.research.google.com/github/Furkan-Simsek/fakeNewsDetectionWithAI/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [None]:
from datasets import load_dataset

ds = load_dataset("mrm8488/fake-news")

In [None]:
# split the dataset into train and test sets
train_ds = ds["train"].train_test_split(test_size=0.2, seed=42)["train"]
test_ds = ds["train"].train_test_split(test_size=0.2, seed=42)["test"]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds
)

In [None]:
def tokenize_function(example):
	return tokenizer(example["text"], padding="max_length", truncation=True)

# Tokenize train and test datasets
train_ds_tokenized = train_ds.map(tokenize_function, batched=True)
test_ds_tokenized = test_ds.map(tokenize_function, batched=True)

# Update trainer to use tokenized datasets
trainer.train_dataset = train_ds_tokenized
trainer.eval_dataset = test_ds_tokenized



In [None]:
import torch
torch.cuda.is_available()

In [None]:
import torch

if torch.cuda.is_available():
    model.to("cuda")

trainer.train()

In [None]:
from transformers import BertForSequenceClassification, BertTokenizerFast

loaded_model = BertForSequenceClassification.from_pretrained("results/checkpoint-500")
#loaded_tokenizer = BertTokenizerFast.from_pretrained("results/checkpoint-500")

In [None]:
# test model
from transformers import pipeline

classifier = pipeline("text-classification", model=loaded_model, tokenizer=tokenizer)

text = test_ds[0]["text"]
result = classifier(text)
print(result)

In [None]:
# Evaluate the model
evaluation_results = trainer.evaluate()
print(evaluation_results)

In [None]:
def classify_text(text):
  """
  Classifies the sentiment of a given text using the loaded model and tokenizer.

  Args:
    text: The input text string to classify.

  Returns:
    The classification result from the pipeline.
  """
  if 'classifier' not in locals():
    print("Error: Classifier pipeline not initialized. Please run the cell to create the classifier.")
    return None
  result = classifier(text)
  return result

# Example usage:
# third_party_text = "This is a third party text to classify."
# classification_result = classify_text(third_party_text)
# print(classification_result)

In [None]:
text = """LONDON, UK – A new report from a group identifying itself as the Global Tech-Wellness Consortium has issued a stark warning to smartphone users worldwide. The report claims that a specific radio frequency, emitted by over 90 percent of modern smartphones, is directly interfering with the human brains ability to enter deep sleep.
The lead researcher, a person named Dr. Evelyn Reed, stated in a public broadcast that the frequency, dubbed P-14, disrupts melatonin production. Melatonin is the key hormone that regulates our sleep-wake cycles. Dr. Reed explained that even when the phone is not in use, this signal is active, creating a constant, low-level disturbance in our environment.
The consortiums research, released directly on their new website and not yet verified by mainstream scientific bodies, suggests a direct correlation between the rollout of this signal over the past five years and a global increase in reported cases of insomnia, chronic fatigue, and daytime grogginess.
Online health communities and influencers are already sharing this information widely. The hashtag #SignalSleepless is trending. One widely shared post urges everyone to switch their phones to airplane mode at night. It reads, The phone companies know about this but are hiding it to protect their profits. You must share this information. It could be the reason you are always tired. Protect your loved ones from this invisible danger.
"""

In [None]:
classification_result = classify_text(text)
print(classification_result)

In [None]:
from transformers import pipeline

classifier = pipeline("text-classification", model=loaded_model, tokenizer=tokenizer)

In [None]:
text = """LONDON, UK – A new report from a group identifying itself as the Global Tech-Wellness Consortium has issued a stark warning to smartphone users worldwide. The report claims that a specific radio frequency, emitted by over 90 percent of modern smartphones, is directly interfering with the human brains ability to enter deep sleep.
The lead researcher, a person named Dr. Evelyn Reed, stated in a public broadcast that the frequency, dubbed P-14, disrupts melatonin production. Melatonin is the key hormone that regulates our sleep-wake cycles. Dr. Reed explained that even when the phone is not in use, this signal is active, creating a constant, low-level disturbance in our environment.
The consortiums research, released directly on their new website and not yet verified by mainstream scientific bodies, suggests a direct correlation between the rollout of this signal over the past five years and a global increase in reported cases of insomnia, chronic fatigue, and daytime grogginess.
Online health communities and influencers are already sharing this information widely. The hashtag #SignalSleepless is trending. One widely shared post urges everyone to switch their phones to airplane mode at night. It reads, The phone companies know about this but are hiding it to protect their profits. You must share this information. It could be the reason you are always tired. Protect your loved ones from this invisible danger.
"""

classification_result = classify_text(text)
print(classification_result)

In [None]:
from transformers import pipeline

# Initialize the classifier pipeline
# Make sure loaded_model and loaded_tokenizer are available from previous cells
if 'loaded_model' not in locals() or 'tokenizer' not in locals():
    print("Error: loaded_model or loaded_tokenizer not found. Please ensure previous cells were run.")
else:
    classifier = pipeline("text-classification", model=loaded_model, tokenizer=tokenizer)

    # Text to classify
    text_to_classify = """LONDON, UK – A new report from a group identifying itself as the Global Tech-Wellness Consortium has issued a stark warning to smartphone users worldwide. The report claims that a specific radio frequency, emitted by over 90 percent of modern smartphones, is directly interfering with the human brains ability to enter deep sleep.
    The lead researcher, a person named Dr. Evelyn Reed, stated in a public broadcast that the frequency, dubbed P-14, disrupts melatonin production. Melatonin is the key hormone that regulates our sleep-wake cycles. Dr. Reed explained that even when the phone is not in use, this signal is active, creating a constant, low-level disturbance in our environment.
    The consortiums research, released directly on their new website and not yet verified by mainstream scientific bodies, suggests a direct correlation between the rollout of this signal over the past five years and a global increase in reported cases of insomnia, chronic fatigue, and daytime grogginess.
    Online health communities and influencers are already sharing this information widely. The hashtag #SignalSleepless is trending. One widely shared post urges everyone to switch their phones to airplane mode at night. It reads, The phone companies know about this but are hiding it to protect their profits. You must share this information. It could be the reason you are always tired. Protect your loved ones from this invisible danger.
    """

    # Classify the text
    classification_result = classifier(text_to_classify)
    print(classification_result)

In [None]:
# Export the model and tokenizer
output_dir = "./exported_model"
loaded_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model and tokenizer exported to {output_dir}")

In [None]:
!zip -r exported_model.zip exported_model/

In [None]:
import torch

# Define the output path for the PyTorch model file
output_path = "./exported_model/pytorch_model.pt"

# Save the model's state dictionary
torch.save(loaded_model.state_dict(), output_path)

print(f"Model state dictionary exported to {output_path}")