In [79]:
'''https://practicaldatascience.co.uk/machine-learning/how-to-detect-fake-news-with-machine-learning'''

'https://practicaldatascience.co.uk/machine-learning/how-to-detect-fake-news-with-machine-learning'

In [80]:
import pandas as pd
import nltk
import sklearn
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
import joblib

In [81]:
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 0.24.2.


In [82]:
df_fake = pd.read_csv('fake.csv')
df_true = pd.read_csv('true.csv')

df_fake['label'] = 1
df_true['label'] = 0

df = pd.concat([df_fake, df_true], axis=0)
df.head()

Unnamed: 0,title,text,subject,date,Unnamed: 4,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",,1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",,1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",,1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",,1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",,1


In [83]:
df['label'].value_counts()

1    33810
0    31804
Name: label, dtype: int64

In [84]:
def tokenize(column):
    """Tokenizes a Pandas dataframe column and returns a list of tokens.
    
    Args:
        column: Pandas dataframe column (i.e. df['text']).
    
    Returns:
        tokens (list): Tokenized list, i.e. [Donald, Trump, tweets]
    
    """
    
    tokens = nltk.word_tokenize(column)
    return [w for w in tokens if w.isalpha()]

In [86]:
df['tokenized'] = df.apply(lambda x: tokenize(str(x['text'])), axis=1)
df[['title', 'tokenized']].head()

Unnamed: 0,title,tokenized
0,Donald Trump Sends Out Embarrassing New Year’...,"[Donald, Trump, just, couldn, t, wish, all, Am..."
1,Drunk Bragging Trump Staffer Started Russian ...,"[House, Intelligence, Committee, Chairman, Dev..."
2,Sheriff David Clarke Becomes An Internet Joke...,"[On, Friday, it, was, revealed, that, former, ..."
3,Trump Is So Obsessed He Even Has Obama’s Name...,"[On, Christmas, day, Donald, Trump, announced,..."
4,Pope Francis Just Called Out Donald Trump Dur...,"[Pope, Francis, used, his, annual, Christmas, ..."


In [87]:
def punctuation_to_features(df, column):
    """Identify punctuation within a column and convert to a text representation.
    
    Args:
        df (object): Pandas dataframe.
        column (string): Name of column containing text. 
        
    Returns:
        df[column]: Original column with punctuation converted to text, 
                    i.e. "Wow! > "Wow exclamation"
    
    """
    
    df[column] = df[column].replace('!', ' exclamation ')
    df[column] = df[column].replace('?', ' question ')
    df[column] = df[column].replace('\'', ' quotation ')
    df[column] = df[column].replace('\"', ' quotation ')
    
    return df[column]

In [88]:
df['text'] = punctuation_to_features(df, 'text')

In [89]:
def remove_stopwords(tokenized_column):
    """Return a list of tokens with English stopwords removed. 
    
    Args:
        column: Pandas dataframe column of tokenized data from tokenize()
    
    Returns:
        tokens (list): Tokenized list with stopwords removed.
    
    """
    stops = set(stopwords.words("english"))
    return [word for word in tokenized_column if not word in stops]

In [90]:
df['stopwords_removed'] = df.apply(lambda x: remove_stopwords(x['tokenized']), axis=1)
df[['title', 'stopwords_removed']].head()

Unnamed: 0,title,stopwords_removed
0,Donald Trump Sends Out Embarrassing New Year’...,"[Donald, Trump, wish, Americans, Happy, New, Y..."
1,Drunk Bragging Trump Staffer Started Russian ...,"[House, Intelligence, Committee, Chairman, Dev..."
2,Sheriff David Clarke Becomes An Internet Joke...,"[On, Friday, revealed, former, Milwaukee, Sher..."
3,Trump Is So Obsessed He Even Has Obama’s Name...,"[On, Christmas, day, Donald, Trump, announced,..."
4,Pope Francis Just Called Out Donald Trump Dur...,"[Pope, Francis, used, annual, Christmas, Day, ..."


In [91]:
def apply_stemming(tokenized_column):
    """Return a list of tokens with Porter stemming applied.
    
    Args:
        column: Pandas dataframe column of tokenized data with stopwords removed.
    
    Returns:
        tokens (list): Tokenized list with words Porter stemmed.
    
    """
    
    stemmer = PorterStemmer() 
    return [stemmer.stem(word).lower() for word in tokenized_column]

In [92]:
df['porter_stemmed'] = df.apply(lambda x: apply_stemming(x['stopwords_removed']), axis=1)
df[['title', 'porter_stemmed']].head()

Unnamed: 0,title,porter_stemmed
0,Donald Trump Sends Out Embarrassing New Year’...,"[donald, trump, wish, american, happi, new, ye..."
1,Drunk Bragging Trump Staffer Started Russian ...,"[hous, intellig, committe, chairman, devin, nu..."
2,Sheriff David Clarke Becomes An Internet Joke...,"[on, friday, reveal, former, milwauke, sheriff..."
3,Trump Is So Obsessed He Even Has Obama’s Name...,"[on, christma, day, donald, trump, announc, wo..."
4,Pope Francis Just Called Out Donald Trump Dur...,"[pope, franci, use, annual, christma, day, mes..."


In [93]:
def rejoin_words(tokenized_column):
    return ( " ".join(tokenized_column))

In [94]:
df['all_text'] = df.apply(lambda x: rejoin_words(x['porter_stemmed']), axis=1)
df[['title', 'all_text']].head()

Unnamed: 0,title,all_text
0,Donald Trump Sends Out Embarrassing New Year’...,donald trump wish american happi new year leav...
1,Drunk Bragging Trump Staffer Started Russian ...,hous intellig committe chairman devin nune go ...
2,Sheriff David Clarke Becomes An Internet Joke...,on friday reveal former milwauke sheriff david...
3,Trump Is So Obsessed He Even Has Obama’s Name...,on christma day donald trump announc would bac...
4,Pope Francis Just Called Out Donald Trump Dur...,pope franci use annual christma day messag reb...


In [95]:
X = df['all_text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, shuffle=True)

In [96]:
bundled_pipeline = Pipeline([("tfidf", TfidfVectorizer()), ("clf", LinearSVC())])
bundled_pipeline.fit(X_train, y_train)
y_pred = bundled_pipeline.predict(X_test)

In [97]:
# export classifier
joblib.dump(bundled_pipeline, 'fake_news_model.pkl', compress=0)

['fake_news_model.pkl']

In [98]:
print(classification_report(y_test, y_pred))
print('Accuracy:',accuracy_score(y_test, y_pred))
print('F1 score:',f1_score(y_test, y_pred))
print('ROC/AUC score:',roc_auc_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.96      0.96      9403
           1       0.96      0.97      0.96     10282

    accuracy                           0.96     19685
   macro avg       0.96      0.96      0.96     19685
weighted avg       0.96      0.96      0.96     19685

Accuracy: 0.9631699263398527
F1 score: 0.9649470579703139
ROC/AUC score: 0.9628258656250019


In [99]:
title = """Walmart boosts minimum wage again, hands out $1,000 bonuses"""
text = '''Walmart boosts minimum wage again, hands out $1,000 bonuses

Walmart is boosting the minimum hourly wage for its U.S. employees to $11 and dishing out bonuses of up to $1,000, crediting President Trump's tax cut for enabling the move.

The increase for the nation's largest private employer also comes amid political pressure to bolster minimum wages and comes amid a hot streak for the retailer.

The big-box store chain, which has more than 1 million U.S. hourly employees, will reap a windfall from the U.S. corporate tax rate cut to 21%, down from 35%.

“Today, we are building on investments we’ve been making in associates, in their wages and skills development,” Walmart CEO Doug McMillon said in a statement. “It’s our people who make the difference and we appreciate how they work hard to make every day easier for busy families.”

The move also comes amid a 17-year low for the U.S. unemployment rate, which has intensified competition for workers.

At the same time, Walmart confirmed Thursday that it is closing about 50 Sam's Club stores and converting another dozen or so into online fulfillment centers. Those closures suggest that the retail landscape remains extremely challenging for certain brands.

Walmart's wage-boost announcement follows a series of similar moves by major corporations that also credited the tax cut with bolstering their businesses.

For example, AT&T and Comcast dished out one-time bonuses of $1,000 to non-management workers, and Wells Fargo and Fifth Third Bancorp boosted base hourly pay to $15.

Economists have argued that one-time bonuses, while significant for workers living paycheck to paycheck, are not as meaningful as permanent wage increases. Critics of tax reform have said companies are dishing out bonuses for the sake of good publicity and to curry favor with the White House.

"Walmart would have had to go to at least $11 in many markets in order to retain reliable employees," University of Michigan business professor Erik Gordon said. "The tax cut made it easier for the company to swallow."

But, Gordon said, the move indicates that "the corporate tax cut's touted trickle-down effects that were met with skepticism by critics may be happening."

Charles Fishman, author of The Wal-Mart Effect, said the move shows that the retailer is betting on solid economic growth for the next two to three years. It also reflected a recognition that Walmart needed to "up its game to keep its employees and get new employees," he said.

"They’ve started to change the way they treat employees because they can’t take them for granted anymore," Fishman said.

Walmart's wage increase will take effect in February. The company said the one-time bonuses would cost the company $400 million and will be based on length of service. Employees who have worked for at least 20 years will get the full $1,000, but most will get up to a few hundred dollars.

The company is also boosting its paid maternity leave policy for full-time hourly workers to 10 weeks at full pay. Previously, mothers got six to eight weeks at half pay.

Walmart is also increasing its paid leave policy for new fathers, non-birthing mothers, adoptive parents and foster parents to six weeks at full pay for full-time hourly workers. Previously, the company didn't offer paid leave for them.

The changes in parental leave put salaried and hourly workers on the same footing.

Walmart also said it would offer financial help to full-time hourly and salaried employees who are seeking to adopt children.

The minimum wage increase comes about a year after Walmart hiked hourly pay to at least $10 for most workers. Twelve states already have minimum wages of at least $10, according to the Economic Policy Institute.

The wage increase comes as Walmart is on a roll. While many traditional retailers have been struggling to keep up with online behemoth Amazon, Walmart has fared better. Walmart's online business is surging.'''

In [100]:
subs = {
    '!': ' exclamation ',
    '?': ' question ',
    '\'': ' quotation ',
    '\"': ' quotation ',
    '”': ' quotation ',
    '“': ' quotation ',
    '’': ' apostrophe ',
}

In [101]:
tokens = nltk.word_tokenize(text)
# print(map(subs.get, tokens, tokens))
tokens = [subs.get(item,item) for item in tokens]
tokens_no_sw = [word for word in tokens if not word in stopwords.words('english')]
stemmer = PorterStemmer() 
final = [stemmer.stem(word).lower() for word in tokens_no_sw]

In [102]:
new_final = " ".join(final)
new_final

"walmart boost minimum wage , hand $ 1,000 bonus walmart boost minimum hourli wage u.s. employe $ 11 dish bonus $ 1,000 , credit presid trump 's tax cut enabl move . the increas nation 's largest privat employ also come amid polit pressur bolster minimum wage come amid hot streak retail . the big-box store chain , 1 million u.s. hourli employe , reap windfal u.s. corpor tax rate cut 21 % , 35 % .  quotation  today , build invest  apostrophe  make associ , wage skill develop ,  quotation  walmart ceo doug mcmillon said statement .  quotation  it  apostrophe  peopl make differ appreci work hard make everi day easier busi families.  quotation  the move also come amid 17-year low u.s. unemploy rate , intensifi competit worker . at time , walmart confirm thursday close 50 sam 's club store convert anoth dozen onlin fulfil center . those closur suggest retail landscap remain extrem challeng certain brand . walmart 's wage-boost announc follow seri similar move major corpor also credit tax cu

In [103]:
X = [new_final]

In [104]:
y_pred = bundled_pipeline.predict(X)

In [105]:
y_pred

array([0])

In [106]:
fn_model = joblib.load('fake_news_model.pkl')

In [107]:
y_pred = fn_model.predict(X)
y_pred

array([0])