In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

In [7]:
news = pd.read_csv('F:/news.csv')

In [8]:
news.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [9]:
news.shape

(6335, 4)

In [10]:
X = news['text']
y = news['label']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [12]:
X_test

1653    0 Add Comment \nIN THE immediate aftermath of ...
1181    Protesters across the US were on Friday gearin...
1447    With nearly all votes counted in elections for...
1696    Originally appeared at The Blog Mire \nFollowi...
3113    Tweet “U.S.A! U.S.A! U.S.A! Po-to-toes! Po-ta-...
                              ...                        
2826    Hillary Endorsed Donald Trump for President Ac...
579     Your daily reality snack Georgia Abandons Ukra...
5003    The Perfect State Index: If Iowa, N.H. Are Too...
4002    Police searching for the second of two escaped...
3413    ( ZHE ) Having unveiled the first images of it...
Name: text, Length: 1267, dtype: object

In [13]:
X_train

1246    By Rixon Stewart on September 12, 2006 \nIs te...
2823    The Republican Party has put itself in an impo...
275     **Want FOX News First in your inbox every day?...
4100    Some Cities Want Their Noncitizen Immigrants t...
3631    Happy Memorial Day! But if you’re in Wisconsin...
                              ...                        
3308    More Election Coverage New Heavy-Duty Voting M...
286     From an Indiana pizzeria to a Washington State...
2932    CBC News \nA large map with the slogan “Pray f...
988     Malone, New York (CNN) After a massive, more-t...
4816    Veteran IT Training Program Leads to 100% Job ...
Name: text, Length: 5068, dtype: object

In [14]:
y

0       FAKE
1       FAKE
2       REAL
3       FAKE
4       REAL
        ... 
6330    REAL
6331    FAKE
6332    FAKE
6333    REAL
6334    REAL
Name: label, Length: 6335, dtype: object

In [15]:
y_train

1246    FAKE
2823    REAL
275     REAL
4100    FAKE
3631    REAL
        ... 
3308    FAKE
286     REAL
2932    FAKE
988     REAL
4816    FAKE
Name: label, Length: 5068, dtype: object

In [16]:
y_test

1653    FAKE
1181    REAL
1447    REAL
1696    FAKE
3113    FAKE
        ... 
2826    FAKE
579     FAKE
5003    REAL
4002    REAL
3413    FAKE
Name: label, Length: 1267, dtype: object

In [17]:
pipeline = Pipeline([('tfidf', TfidfVectorizer(stop_words='english')),
                    ('nbmodel', MultinomialNB())])

In [18]:
pipeline.fit(X_train, y_train)

Pipeline(steps=[('tfidf', TfidfVectorizer(stop_words='english')),
                ('nbmodel', MultinomialNB())])

In [19]:
pred = pipeline.predict(X_test)

In [20]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        FAKE       0.97      0.67      0.79       655
        REAL       0.74      0.98      0.84       612

    accuracy                           0.82      1267
   macro avg       0.85      0.83      0.82      1267
weighted avg       0.86      0.82      0.82      1267



In [21]:
print(confusion_matrix(y_test, pred))

[[441 214]
 [ 14 598]]
