In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string




In [2]:
df_fake = pd.read_csv('/kaggle/input/fake-news-detection/Fake.csv')
df_true = pd.read_csv('/kaggle/input/fake-news-detection/True.csv')

In [3]:
df_true.tail()

Unnamed: 0,title,text,subject,date
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017"
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017"
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017"
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017"
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017"


In [4]:
df_true.shape

(21417, 4)

In [5]:
df_true["status"] = 1
df_fake["status"] = 0

In [6]:
df_true.head()

Unnamed: 0,title,text,subject,date,status
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [7]:
df= pd.concat([df_true, df_fake], axis =0 )
df.tail()

Unnamed: 0,title,text,subject,date,status
23476,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,"January 16, 2016",0
23477,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,"January 16, 2016",0
23478,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,"January 15, 2016",0
23479,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,"January 14, 2016",0
23480,10 U.S. Navy Sailors Held by Iranian Military ...,21st Century Wire says As 21WIRE predicted in ...,Middle-east,"January 12, 2016",0


In [8]:
df = df.drop(["title", "subject","date"], axis = 1)

In [9]:
df.isnull().sum()

text      0
status    0
dtype: int64

In [10]:
df.head()

Unnamed: 0,text,status
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1
2,WASHINGTON (Reuters) - The special counsel inv...,1
3,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [11]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

In [12]:
df["text"] = df["text"].apply(wordopt)


In [13]:
x = df["text"]
y = df["status"]

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25,random_state=42)

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [16]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(xv_train,y_train)

In [17]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(xv_train, y_train)

In [18]:
pred_lr=lr.predict(xv_test)
pred_rf=rf.predict(xv_test)

In [19]:
print(classification_report(y_test, pred_lr))


              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5829
           1       0.98      0.99      0.99      5396

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



In [20]:
accuracy_score(y_test, pred_rf)


0.9912694877505568

In [21]:
accuracy_score(y_test,pred_rf)

0.9912694877505568

In [22]:
# Now let's create the user interface
def predict_news():
    user_input = input("Enter the news text: ")
    user_input_preprocessed = wordopt(user_input)
    user_input_vectorized = vectorization.transform([user_input_preprocessed])
    answer(lr, user_input_vectorized)
    answer(rf, user_input_vectorized)
def answer(model,user_input_vectorized):
    prediction = model.predict(user_input_vectorized)[0]

    if prediction == 1:
        print(f"Predicted by {model } : The news is TRUE.")
    else:
        print(f"Predicted by {model} : The news is FAKE.")

# Run the user interface
predict_news()



    







Enter the news text:  Elon musk has gifted thousands of Tesla cars to the citizens of Nepal.


Predicted by LogisticRegression() : The news is FAKE.
Predicted by RandomForestClassifier() : The news is FAKE.
