<a href="https://colab.research.google.com/github/Coldpay018/Real-vs-Fake-News-Predictor/blob/main/Fake_news_vs_Real_news.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords as stop_words
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix



In [None]:
fake = pd.read_csv('/content/drive/MyDrive/Fake.csv')
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [None]:
fake["label"]=0

In [None]:
real = pd.read_csv('/content/drive/MyDrive/True.csv')
real.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [None]:
real["label"]=1

In [None]:
news = pd.concat([fake,real], ignore_index=True)

In [None]:
news = news.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
news.head()

Unnamed: 0,title,text,subject,date,label
0,Ben Stein Calls Out 9th Circuit Court: Committ...,"21st Century Wire says Ben Stein, reputable pr...",US_News,"February 13, 2017",0
1,Trump drops Steve Bannon from National Securit...,WASHINGTON (Reuters) - U.S. President Donald T...,politicsNews,"April 5, 2017",1
2,Puerto Rico expects U.S. to lift Jones Act shi...,(Reuters) - Puerto Rico Governor Ricardo Rosse...,politicsNews,"September 27, 2017",1
3,OOPS: Trump Just Accidentally Confirmed He Le...,"On Monday, Donald Trump once again embarrassed...",News,"May 22, 2017",0
4,Donald Trump heads for Scotland to reopen a go...,"GLASGOW, Scotland (Reuters) - Most U.S. presid...",politicsNews,"June 24, 2016",1


In [None]:
news.isnull().sum()

Unnamed: 0,0
title,0
text,0
subject,0
date,0
label,0


NO NULL VALUES

In [None]:
news['feature']=news['title']+' ' + news['text']


In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
stopw = set(stop_words.words('english'))

In [None]:
print(stopw)

{'or', "they're", 're', 'such', 'through', 'with', 'off', "you're", 'they', "you'd", 'he', 'during', 'am', 'mustn', 'was', 'we', 'between', 'not', 'does', 'most', 'has', 'will', 'doing', 'from', 'shouldn', 'mightn', 'only', 'my', 'she', 'there', 'here', 'them', 'where', 'against', "doesn't", 'do', 'of', 'being', "i'm", "they'd", 'in', "they'll", 'after', 'were', 'himself', 'some', 'but', "you'll", 'are', 'myself', 'his', 'more', 'who', "couldn't", 'which', 'you', 'so', 'other', "aren't", "i'll", "it'll", 'i', 'this', 'as', 'too', 'theirs', "we've", "he'd", 'haven', 'her', 'for', 'out', 'have', "isn't", 'why', 'wouldn', "hadn't", 'a', 'didn', 'into', 'ours', 'the', 'very', 'won', 'couldn', 'ain', "we're", 'now', 'our', 'about', 'each', 'while', "you've", 'shan', "wasn't", "she'd", 'y', 'hadn', 'below', 'when', 'isn', 'o', 'again', "i'd", "that'll", 'yourselves', 'don', "should've", "they've", 'is', 'just', 'd', 'did', 'themselves', 'those', 'itself', 'needn', 'if', 'ma', "shan't", 'then

In [None]:
news[['subject', 'label']].value_counts(normalize=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,proportion
subject,label,Unnamed: 2_level_1
politicsNews,1,0.251058
worldnews,1,0.225957
News,0,0.201568
politics,0,0.152368
left-news,0,0.099314
Government News,0,0.034968
US_News,0,0.01744
Middle-east,0,0.017328


In [None]:
port_stem = PorterStemmer()

In [None]:
def stemming_function(content):
  content=re.sub(r'[^a-zA-z]',' ',content).lower()
  stemmed_news=content.split()
  stem_news=[port_stem.stem(word) for word in stemmed_news if word not in stopw]
  return ' '.join(stem_news)



In [None]:
X = news['feature'].apply(stemming_function)

In [None]:
Y = news['label']

In [None]:
print(X)

0        ben stein call th circuit court commit coup ta...
1        trump drop steve bannon nation secur council w...
2        puerto rico expect u lift jone act ship restri...
3        oop trump accident confirm leak isra intellig ...
4        donald trump head scotland reopen golf resort ...
                               ...                        
44893    unreal cb ted koppel tell sean hanniti bad ame...
44894    pm may seek eas japan brexit fear trade visit ...
44895    merkel difficult german coalit talk reach deal...
44896    trump stole idea north korean propaganda parod...
44897    break hillari clinton state depart gave russia...
Name: feature, Length: 44898, dtype: object


In [None]:
X.head()

Unnamed: 0,feature
0,ben stein call th circuit court commit coup ta...
1,trump drop steve bannon nation secur council w...
2,puerto rico expect u lift jone act ship restri...
3,oop trump accident confirm leak isra intellig ...
4,donald trump head scotland reopen golf resort ...


In [None]:
tfidf_vectorizer=TfidfVectorizer()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

In [None]:
tfidf_vectorizer.fit(X_train)

In [None]:
X_train_tfidf = tfidf_vectorizer.transform(X_train)

In [None]:
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [None]:
svm = LinearSVC()

In [None]:
svm.fit(X_train_tfidf, Y_train)
pred = svm.predict(X_test_tfidf)

In [None]:
print(accuracy_score(Y_test,pred))

0.9947661469933184


In [None]:
pred2 = svm.predict(X_train_tfidf)

In [None]:
pred3 = svm.predict(X_train_tfidf)

In [None]:
print(accuracy_score(pred3,Y_train))

0.9996102232863745


In [None]:
a = X_test.iloc[4001]

a_proc = stemming_function(a)
a_vec = tfidf_vectorizer.transform([a_proc])
pred5 = svm.predict(a_vec)
if(pred5[0]==0):
  print("Fake news")
else:
  print("Real news")

Fake news


In [None]:
b =  Y_test.iloc[4001]
if(b==0):
  print("Fake news")
else:
  print("Real news")

Fake news


In [None]:
a = str(input("Enter news to predict: "))
a_proc = stemming_function(a)
a_vec = tfidf_vectorizer.transform([a_proc])
pred4 = svm.predict(a_vec)
if(pred4[0]==0):
  print("Fake news")
else:
  print("Real news")



Enter news to predict: "White House, Congress prepare for talks on spending, immigration","WEST PALM BEACH, Fla./WASHINGTON (Reuters) - The White House said on Friday it was set to kick off talks next week with Republican and Democratic congressional leaders on immigration policy, government spending and other issues that need to be wrapped up early in the new year. The expected flurry of legislative activity comes as Republicans and Democrats begin to set the stage for midterm congressional elections in November. President Donald Trump’s Republican Party is eager to maintain control of Congress while Democrats look for openings to wrest seats away in the Senate and the House of Representatives. On Wednesday, Trump’s budget chief Mick Mulvaney and legislative affairs director Marc Short will meet with Senate Majority Leader Mitch McConnell and House Speaker Paul Ryan - both Republicans - and their Democratic counterparts, Senator Chuck Schumer and Representative Nancy Pelosi, the White

In [None]:
pred_test = svm.predict(X_test_tfidf)
set(pred_test)

{np.int64(0), np.int64(1)}

In [None]:
print(confusion_matrix(Y_test, pred_test))

[[4670   26]
 [  21 4263]]
