In [1]:
# Importing The Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Loading the datasets
Truedata = pd.read_csv("True.csv")
Fakedata = pd.read_csv("Fake.csv")

In [3]:
Truedata.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [4]:
Fakedata.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
# Adding labels
Truedata['label'] = 1
Fakedata['label'] = 0

In [6]:
Truedata.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [7]:
# Concatenating the datasets
News = pd.concat([Fakedata, Truedata], axis=0)

In [8]:
News.tail()

Unnamed: 0,title,text,subject,date,label
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017",1


In [9]:
News.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [10]:
News=News.drop(['title','subject','date'],axis=1)
News.head()

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [11]:
# Shuffling the data
News=News.sample(frac=1).reset_index(drop=True)
News.head()

Unnamed: 0,text,label
0,NAIROBI (Reuters) - Competing efforts to end S...,1
1,Here s a video posted on The Donald Reddit ear...,0
2,Our weekly documentary film curated by our edi...,0
3,NEW YORK (Reuters) - U.S. oil and gas industry...,1
4,London s Muslim Mayor has been pretending that...,0


In [12]:
# Text preprocessing function
def wordoption(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d', '', text)
    text = re.sub(r'\n', '', text)
    return text

In [13]:
# Applying text preprocessing
News['text'] = News['text'].apply(wordoption)
News['text']

0        nairobi reuters  competing efforts to end sout...
1        here s a video posted on the donald reddit ear...
2        our weekly documentary film curated by our edi...
3        new york reuters  us oil and gas industry work...
4        london s muslim mayor has been pretending that...
                               ...                        
44893    this was a treat to watchif you want to see a ...
44894    sochi russia reuters  turkey iran and russia h...
44895    the national rifle association nra is opposed ...
44896    abidjan reuters  four moldovan nationals were ...
44897    after years of investigating hillary clinton o...
Name: text, Length: 44898, dtype: object

In [14]:
# Splitting the data
x = News['text']
y = News['label']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [15]:
# Vectorizing the text data
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [16]:
# Training Logistic Regression model
LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [17]:
#Accuracy of Logistic Regression model
pred_lr = LR.predict(xv_test)
LR.score(xv_test,y_test)

0.9881959910913141

In [18]:
#Classification report for Logistic Regression model
print(classification_report(y_test,pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      6987
           1       0.99      0.99      0.99      6483

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [19]:
# Training Decision Tree model
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [20]:
#Accuracy of Decision Tree Classifier Model
pred_dt = DT.predict(xv_test)
DT.score(xv_test,y_test)

0.9949517446176689

In [21]:
#Classification report for Decision Tree Classifier model
print(classification_report(y_test,pred_dt))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      6987
           1       0.99      0.99      0.99      6483

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [22]:
# Training Random Forest Classifier model
RF=RandomForestClassifier()
RF.fit(xv_train, y_train)

In [23]:
#Accuracy of Random Forest Classifier Model
pred_rf = RF.predict(xv_test)
RF.score(xv_test,y_test)

0.988641425389755

In [24]:
#Classification report for Random Forest Classifier model
print(classification_report(y_test,pred_rf))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      6987
           1       0.98      0.99      0.99      6483

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [25]:
# Training Gradient Boosting Classifier model
GB=GradientBoostingClassifier()
GB.fit(xv_train, y_train)

In [26]:
#Accuracy of Gradient Boosting Classifier Model
pred_gb = GB.predict(xv_test)
GB.score(xv_test,y_test)

0.9942835931700075

In [27]:
#Classification report for Gradient Boosting Classifier model
print(classification_report(y_test,pred_gb))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99      6987
           1       0.99      1.00      0.99      6483

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [28]:
def output_label(n):
    if n==0:
        return "Fake News"
    else:
        return "Genuine News"

In [29]:
def manual_testing(news):
    # Create a DataFrame for the input news
    testing_news = {"text": [news]}
    new_df_test = pd.DataFrame(testing_news)
    
    # Apply preprocessing
    new_df_test["text"] = new_df_test["text"].apply(wordoption)
    new_x_test = new_df_test["text"]
    
    # Transform the text data
    new_xv_test = vectorization.transform(new_x_test)
    
    # Get predictions from different models
    pred_lr = LR.predict(new_xv_test)
    pred_gb = GB.predict(new_xv_test)
    pred_rf = RF.predict(new_xv_test)
    pred_dt = DT.predict(new_xv_test)
    
    # Format the output with new lines
    result = (
        f"Logistic Regression Prediction: {output_label(pred_lr[0])}\n"
        f"Gradient Boosting Classifier Prediction: {output_label(pred_gb[0])}\n"
        f"Random Forest Classifier Prediction: {output_label(pred_rf[0])}\n"
        f"Decision Tree Classifier Prediction: {output_label(pred_dt[0])}"
    )
    
    return result

In [30]:
# Get user input and make predictions
news_article = input("Enter a news text to predict if it's Genuine or Fake: ")
print("\nFAKE NEWS DETECTION")
print(manual_testing(news_article))

Enter a news text to predict if it's Genuine or Fake: House Intelligence Committee Chairman Devin Nunes is going to have a bad day. He s been under the assumption, like many of us, that the Christopher Steele-dossier was what prompted the Russia investigation so he s been lashing out at the Department of Justice and the FBI in order to protect Trump. As it happens, the dossier is not what started the investigation, according to documents obtained by the New York Times.Former Trump campaign adviser George Papadopoulos was drunk in a wine bar when he revealed knowledge of Russian opposition research on Hillary Clinton.On top of that, Papadopoulos wasn t just a covfefe boy for Trump, as his administration has alleged. He had a much larger role, but none so damning as being a drunken fool in a wine bar. Coffee boys  don t help to arrange a New York meeting between Trump and President Abdel Fattah el-Sisi of Egypt two months before the election. It was known before that the former aide set 

In [31]:
# Get user input and make predictions
news_article = input("Enter a news text to predict if it's Genuine or Fake: ")
print("\nFAKE NEWS DETECTION")
print(manual_testing(news_article))

Enter a news text to predict if it's Genuine or Fake: WASHINGTON (Reuters) - The special counsel investigation of links between Russia and President Trumpâ€™s 2016 election campaign should continue without interference in 2018, despite calls from some Trump administration allies and Republican lawmakers to shut it down, a prominent Republican senator said on Sunday. Lindsey Graham, who serves on the Senate armed forces and judiciary committees, said Department of Justice Special Counsel Robert Mueller needs to carry on with his Russia investigation without political interference. â€œThis investigation will go forward. It will be an investigation conducted without political influence,â€ Graham said on CBSâ€™s Face the Nation news program. â€œAnd we all need to let Mr. Mueller do his job. I think heâ€™s the right guy at the right time.â€  The question of how Russia may have interfered in the election, and how Trumpâ€™s campaign may have had links with or co-ordinated any such effort, h