<a href="https://colab.research.google.com/github/ShjoonAlbishi/Detect-Fake-News-using-SHAP/blob/main/fake_news.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install shap
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import shap
import matplotlib.pyplot as plt
from google.colab import drive



In [5]:
# Mount Google Drive if using Google Colab
from google.colab import drive
drive.mount('/content/drive')

# Load datasets
path_fake = "/content/drive/MyDrive/380/Fake.csv"
path_true = "/content/drive/MyDrive/380/True.csv"
data_fake = pd.read_csv(path_fake)
data_true = pd.read_csv(path_true)

# Combine datasets
data_fake['label'] = 0  # 0 for fake
data_true['label'] = 1  # 1 for true
data = pd.concat([data_fake, data_true])

# Shuffle the dataset
data = data.sample(frac=1).reset_index(drop=True)

# Preprocess text data
def preprocess_text(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W", " ", text)  # keep only letters and numbers
    text = re.sub('https?://\S+|www\.S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

data['text'] = data['text'].apply(preprocess_text)

# Split data
x_train, x_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.20, random_state=42, stratify=data['label'])

# Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
xv_train = vectorizer.fit_transform(x_train)
xv_test = vectorizer.transform(x_test)

# Model training
model = LogisticRegression(random_state=42)
model.fit(xv_train, y_train)

# Evaluate the model
predictions = model.predict(xv_test)
print(classification_report(y_test, predictions))

# Initialize SHAP with dense data if manageable in memory
explainer = shap.LinearExplainer(model, xv_train.toarray(), feature_perturbation="interventional")

# When explaining an individual prediction, ensure the data is dense
def explain_news(index):
    shap.initjs()
    # Convert sparse vector to dense
    dense_test_vector = xv_test[index].toarray()
    shap_values = explainer.shap_values(dense_test_vector)
    return shap.force_plot(explainer.expected_value, shap_values, dense_test_vector, feature_names=vectorizer.get_feature_names_out())

# Test with an example
index_to_test = 0  # Change index to test different news items
explain_news(index_to_test)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4696
           1       0.99      0.99      0.99      4284

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980





In [6]:
# Function to manually test news and show SHAP values
def manual_testing(news):
    processed_news = preprocess_text(news)
    vectorized_news = vectorizer.transform([processed_news])
    prediction = model.predict(vectorized_news)
    prediction_proba = model.predict_proba(vectorized_news)[0]

    print("\nPredicted Class:", "Fake News" if prediction[0] == 0 else "True News")
    print("Prediction Probability:", prediction_proba)

    # SHAP values
    shap_values = explainer.shap_values(vectorized_news)
    shap.initjs()
    return shap.force_plot(explainer.expected_value, shap_values[0], vectorized_news.toarray(), feature_names=vectorizer.get_feature_names_out())

# User input for news
input_news = input("Enter a piece of news text to classify: ")
manual_testing(input_news)

Enter a piece of news text to classify: WASHINGTON (Reuters) - Transgender people will be allowed for the first time to enlist in the U.S. military starting on Monday as ordered by federal courts, the Pentagon said on Friday, after President Donald Trump’s administration decided not to appeal rulings that blocked his transgender ban. Two federal appeals courts, one in Washington and one in Virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on Jan. 1. A Justice Department official said the administration will not challenge those rulings. “The Department of Defense has announced that it will be releasing an independent study of these issues in the coming weeks. So rather than litigate this interim appeal before that occurs, the administration has decided to wait for DOD’s study and will continue to defend the president’s lawful authority in District Court in the meantime,” th