In [148]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [149]:
df=pd.read_csv('news.csv')  
df

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


In [150]:
df.shape

(6335, 4)

In [151]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [152]:
labels=df.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [153]:
x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)

In [154]:
x_train

6237    The head of a leading survivalist group has ma...
3722    ‹ › Arnaldo Rodgers is a trained and educated ...
5774    Patty Sanchez, 51, used to eat 13,000 calories...
336     But Benjamin Netanyahu’s reelection was regard...
3622    John Kasich was killing it with these Iowa vot...
                              ...                        
5699                                                     
2550    It’s not that Americans won’t elect wealthy pr...
537     Anyone writing sentences like ‘nevertheless fu...
1220    More Catholics are in Congress than ever befor...
4271    It was hosted by CNN, and the presentation was...
Name: text, Length: 5068, dtype: object

In [155]:
x_test

3534    A day after the candidates squared off in a fi...
6265    VIDEO : FBI SOURCES SAY INDICTMENT LIKELY FOR ...
3123    It's debate season, where social media has bro...
3940    Mitch McConnell has decided to wager the Repub...
2856    Donald Trump, the actual Republican candidate ...
                              ...                        
4986    Washington (CNN) President Barack Obama announ...
5789    The revival of middle-class jobs has been one ...
4338    "I can guarantee that," Obama answered when as...
5924    Videos 30 Civilians Die In US Airstrike Called...
6030    The retired neurosurgeon lashed out Friday mor...
Name: text, Length: 1267, dtype: object

In [156]:
y_train

6237    FAKE
3722    FAKE
5774    FAKE
336     REAL
3622    REAL
        ... 
5699    FAKE
2550    REAL
537     REAL
1220    REAL
4271    REAL
Name: label, Length: 5068, dtype: object

In [157]:
y_test

3534    REAL
6265    FAKE
3123    REAL
3940    REAL
2856    REAL
        ... 
4986    REAL
5789    REAL
4338    REAL
5924    FAKE
6030    REAL
Name: label, Length: 1267, dtype: object

In [158]:
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)

In [159]:
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 

In [160]:
tfidf_test=tfidf_vectorizer.transform(x_test)

In [161]:
pac=PassiveAggressiveClassifier(max_iter=50)          

In [162]:
pac.fit(tfidf_train,y_train)

PassiveAggressiveClassifier(max_iter=50)

In [163]:
y_pred=pac.predict(tfidf_test)

In [164]:
score=accuracy_score(y_test,y_pred)

In [165]:
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 92.82%


In [166]:
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])                   

array([[590,  48],
       [ 43, 586]], dtype=int64)

In [167]:
new="tommorow there will rain in mumbai"

In [168]:
d={"News":[new]}
d

{'News': ['tommorow there will rain in mumbai']}

In [169]:
type(d)

dict

In [170]:
data=pd.DataFrame(d) 
data

Unnamed: 0,News
0,tommorow there will rain in mumbai


In [171]:
data['News']

0    tommorow there will rain in mumbai
Name: News, dtype: object

In [172]:
tfidf_train=tfidf_vectorizer.transform(data['News']) 

In [173]:
y_pred=pac.predict(tfidf_train)
y_pred

array(['FAKE'], dtype='<U4')

In [174]:
new="weather forecast deparment states says that tommorow there will rain in mumbai"

In [175]:
d={"News":[new]}
d

{'News': ['weather forecast deparment states says that tommorow there will rain in mumbai']}

In [176]:
data=pd.DataFrame(d) 
data

Unnamed: 0,News
0,weather forecast deparment states says that to...


In [177]:
data['News']

0    weather forecast deparment states says that to...
Name: News, dtype: object

In [178]:
tfidf_train=tfidf_vectorizer.transform(data['News']) 

In [179]:
y_pred=pac.predict(tfidf_train)
y_pred

array(['REAL'], dtype='<U4')