<a href="https://colab.research.google.com/github/PKAVIYADARSHINI/fake-news-detection/blob/main/Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
import re
import string

# Load datasets
df_fake = pd.read_csv("/Fake.csv (3).zip")
df_true = pd.read_csv("/True.csv (2).zip")

# Label the datasets
df_fake["class"] = 0  # Fake news labeled as 0
df_true["class"] = 1  # True news labeled as 1

# Combine datasets
df = pd.concat([df_fake, df_true]).reset_index(drop=True)

# Shuffle the dataset
df = df.sample(frac=1).reset_index(drop=True)

# Preprocess the text
def wordopt(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\n', '', text)
    text = re.sub(r'\w*\d\w*', '', text)
    return text

df['text'] = df['text'].apply(wordopt)

# Split features and target
x = df['text']
y = df['class']

# Text vectorization
vectorization = TfidfVectorizer()
x = vectorization.fit_transform(x)

# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

# Logistic Regression
LR = LogisticRegression()
LR.fit(x_train, y_train)
pred_lr = LR.predict(x_test)

# Decision Tree Classifier
DT = DecisionTreeClassifier()
DT.fit(x_train, y_train)
pred_dt = DT.predict(x_test)

# Gradient Boosting Classifier
GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(x_train, y_train)
pred_gbc = GBC.predict(x_test)

# Random Forest Classifier
RFC = RandomForestClassifier(random_state=0)
RFC.fit(x_train, y_train)
pred_rfc = RFC.predict(x_test)

# K-Nearest Neighbors Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
pred_knn = knn.predict(x_test)

# Evaluate KNN
accuracy_knn = accuracy_score(y_test, pred_knn)
print("KNN Accuracy:", accuracy_knn)
print("KNN Classification Report:\n", classification_report(y_test, pred_knn))

# Manual Testing Function
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text": [news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)

    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    pred_KNN = knn.predict(new_xv_test)

    return print(
        "\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {} \nKNN Prediction: {}".format(
            output_lable(pred_LR[0]),
            output_lable(pred_DT[0]),
            output_lable(pred_GBC[0]),
            output_lable(pred_RFC[0]),
            output_lable(pred_KNN[0]),
        )
    )

# Display outputs of all models
print("Logistic Regression Accuracy:", accuracy_score(y_test, pred_lr))
print("Decision Tree Accuracy:", accuracy_score(y_test, pred_dt))
print("Gradient Boosting Classifier Accuracy:", accuracy_score(y_test, pred_gbc))
print("Random Forest Accuracy:", accuracy_score(y_test, pred_rfc))
print("KNN Accuracy:", accuracy_knn)

# Example of Manual Testing
manual_testing("The government has launched a new policy to boost economic growth.")
manual_testing("Breaking news: Aliens have landed on Earth and are offering free energy solutions.")



KNN Accuracy: 0.6885523385300668
KNN Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.98      0.77      5940
           1       0.94      0.36      0.52      5285

    accuracy                           0.69     11225
   macro avg       0.79      0.67      0.65     11225
weighted avg       0.78      0.69      0.65     11225

Logistic Regression Accuracy: 0.9831625835189309
Decision Tree Accuracy: 0.9960801781737194
Gradient Boosting Classifier Accuracy: 0.995902004454343
Random Forest Accuracy: 0.9874387527839643
KNN Accuracy: 0.6885523385300668


LR Prediction: Not A Fake News 
DT Prediction: Fake News 
GBC Prediction: Fake News 
RFC Prediction: Fake News 
KNN Prediction: Fake News


LR Prediction: Fake News 
DT Prediction: Fake News 
GBC Prediction: Fake News 
RFC Prediction: Fake News 
KNN Prediction: Fake News


In [None]:
from google.colab import drive
drive.mount('/content/drive')