<a href="https://colab.research.google.com/github/ArbazKhalid3/FakeNewsClassifier/blob/main/FinalFakeNewsDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


#Load Data

In [None]:
# Load datasets
data_fake = pd.read_csv('Fake.csv')
data_true = pd.read_csv('True.csv')

# Assign class labels
data_fake['class'] = 0
data_true['class'] = 1

# Merge datasets
data = pd.concat([data_fake, data_true], axis=0).reset_index(drop=True)

# Drop unnecessary columns
data = data.drop(['title', 'subject', 'date'], axis=1)

#Preprocess Text

In [None]:
def wordopt(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r"\\W", " ", text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\w*\d\w*', '', text)
    return text

data['text'] = data['text'].apply(wordopt)


#Split Data

In [None]:
# Define independent (X) and dependent (y) variables
X = data['text']
y = data['class']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Convert text to vectors
vectorization = TfidfVectorizer()
X_train = vectorization.fit_transform(X_train)
X_test = vectorization.transform(X_test)


#Train Logistic Regression

In [None]:
LR = LogisticRegression()
LR.fit(X_train, y_train)
pred_lr = LR.predict(X_test)

print("Logistic Regression Classification Report")
print(classification_report(y_test, pred_lr))

Logistic Regression Classification Report
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5895
           1       0.98      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



#Train Decision Tree Classifier

In [None]:
DT = DecisionTreeClassifier()
DT.fit(X_train, y_train)
pred_dt = DT.predict(X_test)

print("Decision Tree Classification Report")
print(classification_report(y_test, pred_dt))

Decision Tree Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5895
           1       1.00      1.00      1.00      5330

    accuracy                           1.00     11225
   macro avg       1.00      1.00      1.00     11225
weighted avg       1.00      1.00      1.00     11225



#Train Gradient Boosting Classifier

In [None]:
GB = GradientBoostingClassifier(random_state=0)
GB.fit(X_train, y_train)
pred_gb = GB.predict(X_test)

print("Gradient Boosting Classification Report")
print(classification_report(y_test, pred_gb))

Gradient Boosting Classification Report
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5895
           1       0.99      1.00      1.00      5330

    accuracy                           1.00     11225
   macro avg       1.00      1.00      1.00     11225
weighted avg       1.00      1.00      1.00     11225



#Train Random Forest Classifier

In [None]:
RF = RandomForestClassifier(random_state=0)
RF.fit(X_train, y_train)
pred_rf = RF.predict(X_test)

print("Random Forest Classification Report")
print(classification_report(y_test, pred_rf))

Random Forest Classification Report
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      5895
           1       0.99      0.98      0.98      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



#Manual Testing

In [None]:
def output_label(n):
    return "Fake News" if n == 0 else "Not A Fake News"

def manual_testing(news):
    test_data = {"text": [news]}
    df = pd.DataFrame(test_data)
    df['text'] = df['text'].apply(wordopt)
    vectorized_test = vectorization.transform(df['text'])

    pred_LR = LR.predict(vectorized_test)
    pred_DT = DT.predict(vectorized_test)
    pred_GB = GB.predict(vectorized_test)
    pred_RF = RF.predict(vectorized_test)

    print("\nPredictions:")
    print(f"Logistic Regression: {output_label(pred_LR[0])}")
    print(f"Decision Tree: {output_label(pred_DT[0])}")
    print(f"Gradient Boosting: {output_label(pred_GB[0])}")
    print(f"Random Forest: {output_label(pred_RF[0])}")


#Test the Model

In [None]:
news = input("Enter news text to test: ")
manual_testing(news)

Enter news text to test: "Global COVID-19 Vaccination Efforts Continue Amidst Challenges"  As of late 2021, countries around the world are continuing their efforts to vaccinate populations against COVID-19, with over 7 billion doses administered globally. However, the vaccine rollout has faced challenges, such as supply issues, vaccine hesitancy, and inequality between high- and low-income countries.

Predictions:
Logistic Regression: Fake News
Decision Tree: Fake News
Gradient Boosting: Fake News
Random Forest: Fake News
