### FAKE NEWS DETECTION

#### IMPORTING REQUIRED LIBRARIES 

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import shuffle
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import nltk





#### DATA COLLECTION

In [7]:
True_dataset = pd.read_csv('True.csv')
Fake_dataset = pd.read_csv('Fake.csv')

#### LABEL ASSIGNMENT


In [10]:
True_dataset['label']=1
Fake_dataset['label']=0

#### MERGING OF BOTH DATASETS

In [13]:
data=pd.concat([True_dataset,Fake_dataset])

In [15]:
data=shuffle(data).reset_index(drop=True)

In [17]:
data.head(15)

Unnamed: 0,title,text,subject,date,label
0,ROB SCHNEIDER Nails The Russia Conspiracy Theo...,,Government News,"Mar 30, 2017",0
1,OBAMA MAKES STUNNING 11th Hour Gift Of Massive...,In what amounts to an 11th hour gift by the ...,left-news,"Jan 10, 2017",0
2,Justice Dept. Has Had Enough — Sues Ferguson ...,The Department of Justice laid down the law on...,News,"February 11, 2016",0
3,Suspicious object found with body in Stockholm...,STOCKHOLM (Reuters) - A man was found dead on ...,worldnews,"November 16, 2017",1
4,“MEATHEAD” ROB REINER Calls For ‘ALL OUT WAR’ ...,"Writing on Twitter Sunday, the All in the Fami...",left-news,"Jun 26, 2017",0
5,Twitter Has The ULTIMATE Way For SNL To Get U...,It s no secret to anyone not living under a ro...,News,"February 7, 2017",0
6,Ted Cruz Is Finding It Hard To Even Get Love ...,When thinking about who would be the best fit ...,News,"January 31, 2016",0
7,Trump Supporters At ‘Mother Of All Rallies’ M...,"Saturday, September 16th, 2017 will surely go ...",News,"September 16, 2017",0
8,Trump visit could be 'turning point' on North ...,SEOUL (Reuters) - South Korea President Moon J...,worldnews,"November 7, 2017",1
9,White House official is person of interest in ...,WASHINGTON (Reuters) - A current White House o...,politicsNews,"May 19, 2017",1


#### FUNCTION TO PREPROCESS THE DATA

In [20]:

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    words = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    stemmer = PorterStemmer()
    words = [stemmer.stem(word) for word in words]
    return ' '.join(words)


#### PREPROCESSING ON THE TEXT COLUMN


In [23]:
data['text'] = data['text'].apply(preprocess_text)

#### FEATURE EXTRACTION USING TF-IDF

In [25]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['text'])
Y = data['label']


#### TRAINING AND TESTING SETS

In [28]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=42)


#### LOGISTIC REGRESSION MODEL

In [30]:
model = LogisticRegression()
model.fit(X_train, Y_train)


#### MODEL EVALUATION

In [33]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
# Make predictions
Y_pred = model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred )
recall = recall_score(Y_test, Y_pred )
f1 = f1_score(Y_test, Y_pred)

# Print the results
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Classification Report:')
print(classification_report(Y_test, Y_pred, target_names=['real', 'fake']))

Accuracy: 0.9863028953229399
Precision: 0.9831854273703877
Recall: 0.9880309786435109
F1 Score: 0.985602247454056
Classification Report:
              precision    recall  f1-score   support

        real       0.99      0.98      0.99      4719
        fake       0.98      0.99      0.99      4261

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



#### FUNCTION TO CHECK THE INPUT IS REAL OR FAKE


In [36]:
def check_real_or_fake(input_text):
    # Preprocess the input text
    preprocessed_text = preprocess_text(input_text)
     # Transform the input text using the TF-IDF vectorizer
    input_features = vectorizer.transform([preprocessed_text])
      # Predict the label of the input text
    prediction = model.predict(input_features)
    prediction_proba = model.predict_proba(input_features)
    
    # Output the prediction
    if prediction[0] == 1:
        result = "The news article is predicted to be real."
    else:
        result = "The news article is predicted to be fake."
    
    # Output the prediction probability
    probability = f"Prediction probability: {prediction_proba[0]}"
    
    return result, probability



#### TESTING THE MODEL

In [43]:
input_text=str(input())
result, probability = check_real_or_fake(input_text)
print(result)
print(probability)

 WHOA! Did “White Supremacist” Who Organized Ch...	


The news article is predicted to be fake.
Prediction probability: [0.93622281 0.06377719]
