In [None]:
!pip install --upgrade transformers

In [None]:
!pip install transformers datasets scikit-learn


In [None]:
import transformers
print(transformers.__version__)


In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
uploaded = files.upload()

In [None]:
import pandas as pd
fake_df = pd.read_csv("Fake.csv")
true_df = pd.read_csv("True.csv")

In [None]:
fake_df

In [None]:
true_df

In [None]:
fake_df["label"] = 1
true_df["label"] = 0

In [None]:
df = pd.concat([fake_df, true_df]).reset_index(drop=True)
df = df[["text", "label"]]

In [None]:
print(df.head())
print("Class distribution:\n", df['label'].value_counts())

In [None]:
from sklearn.model_selection import train_test_split

train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"]
)


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=256)

In [None]:
import torch

class NewsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels.iloc[idx])
        return item

train_dataset = NewsDataset(train_encodings, train_labels)
test_dataset = NewsDataset(test_encodings, test_labels)

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=2
)

In [None]:
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50
    # 🚑 No evaluation_strategy here
)

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="macro")
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()


In [None]:
eval_results = trainer.evaluate()
print("Evaluation Results:", eval_results)

In [None]:
preds = trainer.predict(test_dataset)
y_true = preds.label_ids
y_pred = preds.predictions.argmax(-1)


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score, f1_score, classification_report

import matplotlib.pyplot as plt

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# Display
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Real (0)", "Fake (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Fake News Detection")
plt.show()


In [None]:
print("✅ Accuracy:", accuracy_score(y_true, y_pred))
print("✅ F1 Score:", f1_score(y_true, y_pred, average="macro"))
print("\nClassification Report:\n", classification_report(y_true, y_pred))

In [None]:
from transformers import pipeline

# Save model and tokenizer
model.save_pretrained("./fake-news-model")
tokenizer.save_pretrained("./fake-news-model")

# Load pipeline
fake_news_classifier = pipeline(
    "text-classification",
    model="./fake-news-model",
    tokenizer="./fake-news-model",
    device=0 if torch.cuda.is_available() else -1  # use GPU if available
)

# Test with examples
print(fake_news_classifier("NASA confirms discovery of a new planet."))
print(fake_news_classifier("Aliens landed in New York City, media is hiding the truth!"))




In [None]:
print(fake_news_classifier(" Trump campaign adviser George Papadopoulos told an Australian diplomat in May 2016 that Russia had political dirt on Democratic presidential candidate Hillary Clinton, the New York Times"))

In [None]:
print(fake_news_classifier("NASA confirms discovery of new planet."))
print(fake_news_classifier("Aliens landed in New York, media hides truth."))


In [None]:
!jupyter nbconvert --ClearMetadataPreprocessor.enabled=True --clear-output --to notebook --output cleaned_notebook.ipynb your_notebook.ipynb
