In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

In [4]:
data = pd.read_csv("fake_or_real_news.csv")

In [5]:
data

Unnamed: 0,id,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


In [6]:
data['fake'] = data['label'].apply(lambda x: 0 if x == "REAL" else 1)

In [7]:
data

Unnamed: 0,id,title,text,label,fake
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,1
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,1
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,0
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,1
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,0
...,...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL,0
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE,1
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE,1
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL,0


In [8]:
X,y = data['text'], data['fake']

In [9]:
X

0       Daniel Greenfield, a Shillman Journalism Fello...
1       Google Pinterest Digg Linkedin Reddit Stumbleu...
2       U.S. Secretary of State John F. Kerry said Mon...
3       — Kaydee King (@KaydeeKing) November 9, 2016 T...
4       It's primary day in New York and front-runners...
                              ...                        
6330    The State Department told the Republican Natio...
6331    The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
6332     Anti-Trump Protesters Are Tools of the Oligar...
6333    ADDIS ABABA, Ethiopia —President Obama convene...
6334    Jeb Bush Is Suddenly Attacking Trump. Here's W...
Name: text, Length: 6335, dtype: object

In [10]:
y

0       1
1       1
2       0
3       1
4       0
       ..
6330    0
6331    1
6332    1
6333    0
6334    0
Name: fake, Length: 6335, dtype: int64

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [12]:
X_train

4180    The fate of the Affordable Care Act, the presi...
1404    We Are Change \n\n \nRemember, remember, the 5...
4009    For a speech purporting to challenge Washingto...
5294    Trump Promises ‘New Deal for Black America’ Be...
4544    The Cosmic Story: Scorpio New Moon, October 30...
                              ...                        
1897    Share on Facebook \nMoveOn.org is a George Sor...
3307    On Wednesday, after defeat in Indiana, John Ka...
777     Home › POLITICS › CLINTON EMAIL INVESTIGATION ...
6166      Bitcoin Soars As China Launches Crackdown On...
3743    BNI Store Oct 27 2016 GERMANY: Parents outrage...
Name: text, Length: 5068, dtype: object

In [16]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [17]:
classifier = LinearSVC()
classifier.fit(X_train_vectorized, y_train)



In [25]:
print('The model accuracy is :',classifier.score(X_test_vectorized, y_test)*100)

The model accuracy is : 93.37016574585635


In [35]:
with open("mytext.txt", "w", encoding="utf-8") as f: 
    f.write(X_test.iloc[10])

In [36]:
with open("mytext.txt", "r", encoding="utf-8") as f: 
    text = f.read()

In [37]:
text

'There\'s a sense of growing optimism among Democrats that if Donald Trump is at the top of the ticket they might have a chance at what otherwise seems impossible: curtailing the GOP\'s stranglehold on the US House of Representatives.\n\nThe fundamental landscape is deeply unfavorable to House Democrats. They\'re down 30 seats and behind in fundraising with district boundaries drawn in such a way that winning a national majority of votes won\'t deliver them a majority of seats. They need, fundamentally, something game-changing and weird to happen. And then, like magic, along comes Donald Trump, who happens to be weak in exactly the sort of Republican-leaning suburban districts they are hoping to peel away from the GOP.\n\n"[Trump] makes districts that would have been hard-core tossup districts" into ones that lean Democratic, and gives Democrats "a little bit of a push" in Republican-leaning districts across the country, according to Kelly Ward, the executive director of the Democratic

In [38]:
vectorized_text = vectorizer.transform([text])

In [39]:
classifier.predict(vectorized_text)

array([0])

In [40]:
y_test.iloc[10]

0

In [42]:
text_check = "Clinton Campaign Demands FBI Affirm Trump's Russia Ties With the 2016 election campaign winding down, the Clinton campaign is ratcheting up demands for the FBI to publicly confirm the campaign’s allegations that Republican nominee Donald Trump is secretly in league with Russia. Sen. Harry Reid (D – NV) went so far as to claim the FBI has secret “explosive” evidence of coordination between the Trump campaign and the Russian government that it is withholding. FBI officials familiar with their investigations into the allegations, which the Clinton campaign started publicizing around the Democratic National Convention, say they’ve turned up nothing to connect Trump and Russia , leading FBI Director James Comey to decide against making any statements to that effect. The Clinton campaign has been making the allegations so long that they have taken to claiming “everyone knows” that they are true, and appears unsettled by the FBI’s refusal to sign off on the claims simply because they haven’t been able to find real evidence corroborating the story. The Trump campaign has repeatedly denied ties to Russia, but that didn’t stop Clinton from calling Trump a “puppet” of Russian President Vladimir Putin during the final presidential debate. The calls have grown since Friday’s FBI report to Congress about further Clinton emails being sought. With Clinton’s main campaign scandal growing in the waning weeks of the deal, some in her campaign have suggested that affirming Trump as secretly in league with the Russians would only be fair. Absent any evidence, however, it appears that won’t be happening."

with open("text_check.txt", "w", encoding="utf-8") as f: 
    f.write(text_check)

In [43]:
with open("text_check.txt", "r", encoding="utf-8") as f: 
    text_check = f.read()

In [44]:
text_check

"Clinton Campaign Demands FBI Affirm Trump's Russia Ties With the 2016 election campaign winding down, the Clinton campaign is ratcheting up demands for the FBI to publicly confirm the campaign’s allegations that Republican nominee Donald Trump is secretly in league with Russia. Sen. Harry Reid (D – NV) went so far as to claim the FBI has secret “explosive” evidence of coordination between the Trump campaign and the Russian government that it is withholding. FBI officials familiar with their investigations into the allegations, which the Clinton campaign started publicizing around the Democratic National Convention, say they’ve turned up nothing to connect Trump and Russia , leading FBI Director James Comey to decide against making any statements to that effect. The Clinton campaign has been making the allegations so long that they have taken to claiming “everyone knows” that they are true, and appears unsettled by the FBI’s refusal to sign off on the claims simply because they haven’t

In [45]:
vectorized_text_fake = vectorizer.transform([text_check])

In [46]:
classifier.predict(vectorized_text_fake)

array([1])