In [165]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import itertools
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import VotingClassifier

In [166]:
df = pd.read_csv('D://datasets/fake_or_real_news_2.csv')

In [167]:
y = df.label
df.drop('label', axis =1)

Unnamed: 0.1,Unnamed: 0,title,text,title_vectors
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",[ 1.1533764e-02 4.2144405e-03 1.9692603e-02 ...
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,[ 0.11267698 0.02518966 -0.00212591 0.021095...
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,[ 0.04253004 0.04300297 0.01848392 0.048672...
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",[ 0.10801624 0.11583211 0.02874823 0.061732...
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,[ 1.69016439e-02 7.13498285e-03 -7.81233795e-...
5,6903,"Tehran, USA","\nI’m not an immigrant, but my grandparents ...",[ 0.02698107 0.02214094 0.01328667 0.032845...
6,7341,Girl Horrified At What She Watches Boyfriend D...,"Share This Baylee Luciani (left), Screenshot o...",[ 5.02990894e-02 5.96152917e-02 2.76773814e-...
7,95,‘Britain’s Schindler’ Dies at 106,A Czech stockbroker who saved more than 650 Je...,[ 3.01096570e-02 2.37618014e-02 1.04206288e-...
8,4869,Fact check: Trump and Clinton at the 'commande...,Hillary Clinton and Donald Trump made some ina...,[ 0.0511766 -0.02986711 -0.02842706 0.013333...
9,2909,Iran reportedly makes new push for uranium con...,Iranian negotiators reportedly have made a las...,[ 0.063163 0.13109246 0.08566006 0.105495...


In [168]:
X_train, X_test, y_train, y_test = train_test_split(
    df['text'],
    y,
    test_size=0.2,
    random_state=42
)

In [169]:
text = ["""BOSTON/NEW YORK (Reuters) - Americans in liberal enclaves from New York to San Francisco reacted with shock and despair on Wednesday to Republican businessman Donald Trump’s defeat of Democrat Hillary Clinton, with many struggling to explain the result to their children.Trump, who had never before run for public office, won on a broad wave of support both from the Republican U.S. heartland and by flipping previously Democratic states, including Pennsylvania, Florida and Ohio, helping his party protect majorities in both chambers of Congress.

The victory stunned residents of major cities up and down the East and West coasts, many of whom had trusted opinion polls that had long predicted a Clinton victory but were proven profoundly wrong by Tuesday’s results.

“I’m feeling physical pain. I’m shocked. I’m sad,” said Sofia Huizar, 30, as she waited outside the Manhattan hotel where Clinton conceded her loss on Wednesday morning, some eight hours after Trump declared victory.

Huizar, a U.S. citizen who was born in Mexico, said she spent much of Tuesday night commiserating with family across the border. “It’s a way to help process the fear,” Huizar said.

Trump’s campaign promises have included building a wall along the U.S.-Mexico border to stop illegal crossings and to deport the estimated 11 million undocumented immigrants living in the United States.

Others, like Kim Priban, a 38-year-old nurse who lives in the Cleveland suburb of Boston Heights, Ohio, said they had a difficult time breaking the news to their children.

Priban said she had proudly taken her 5-year-old daughter along to vote for Clinton, who would have been the first woman U.S. president, and on Wednesday morning had to discuss the result with her.

“I still haven’t quite figured out what to tell my daughter. I’ve been crying all day,” Priban said. “I feel like I have to go out and make my voice heard for women and children.”

Priban and Huizar are likely far from alone in the depth of their pain over Clinton’s loss, academic research suggests.

A 2015 paper by researchers at Harvard University found that voters who supported Republican Mitt Romney’s unsuccessful White House bid reported sharper spikes in their unhappiness than parents following the 2012 Sandy Hook massacre of Connecticut schoolchildren or Boston residents after the deadly 2013 Boston Marathon bombing."""]

In [170]:
count_vectorizer = CountVectorizer(stop_words ='english')

count_train = count_vectorizer.fit_transform(X_train)
count_test = count_vectorizer.transform(X_test)

In [171]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

In [172]:
tfidf_text = tfidf_vectorizer.transform(text)

In [173]:
hash_vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False )
hash_train = hash_vectorizer.fit_transform(X_train)
hash_test = hash_vectorizer.transform(X_test)

In [174]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [175]:
clf_count = MultinomialNB()
clf_count.fit(count_train, y_train)
pred=clf_count.predict(count_test)
score = accuracy_score(y_test, pred)
print("accuracy: %0.3f" % score)
confusion_matrix(y_test, pred)

accuracy: 0.899


array([[544,  84],
       [ 44, 595]], dtype=int64)

In [176]:
clf_hash = MultinomialNB(alpha=0.01)
clf_hash.fit(hash_train, y_train)
pred = clf_hash.predict(hash_test)
score = accuracy_score(y_test, pred)
print('accuracy:   %0.03f' % score)
confusion_matrix(y_test, pred )

accuracy:   0.908


array([[553,  75],
       [ 42, 597]], dtype=int64)

In [177]:
clf_nb = MultinomialNB()
clf_pa = PassiveAggressiveClassifier()
clf_vot = VotingClassifier(
    estimators=[
        ('clf_pa' , clf_pa ),
        ('clf_svc', clf_svc)
    ],
    voting='hard'
)
clf_svc = LinearSVC()

In [178]:
for clf_ in (clf_nb, clf_pa ,clf_svc ,clf_vot):
    clf_.fit(tfidf_train,y_train)
    pred_clf= clf_.predict(tfidf_test)
    print(clf_.__class__.__name__, 'accuracy', \
         accuracy_score(y_test,pred_clf))
    

MultinomialNB accuracy 0.8453038674033149
PassiveAggressiveClassifier accuracy 0.9392265193370166




LinearSVC accuracy 0.936069455406472
VotingClassifier accuracy 0.9329123914759274


In [179]:
pred_final = clf_pa.predict(tfidf_text)

In [180]:
print(pred_final)

['REAL']
