In [4]:
# this is our preamble cell :
import plotly.graph_objects as go
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt

import category_encoders as ce
from sklearn.model_selection import train_test_split


from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from category_encoders import OrdinalEncoder

from sklearn import cluster
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.tree import plot_tree


from joblib import dump

# from shap import TreeExplainer, initjs, force_plot
from pdpbox.pdp import pdp_interact, pdp_interact_plot

In [5]:
# importing and minor cleaning first, parsing dates. 

dfreal = pd.read_csv('True.csv',
                    parse_dates = ['date'])
#                    index_col = 'date')
dfreal['Fake'] = 0
print('Real data shape: ', dfreal.shape)

dffake = pd.read_csv('Fake.csv',
                    parse_dates = ['date'])
#                    index_col = 'date')

dffake['Fake'] = 1

# I have added the boolean column for Fake/Real to each. 

# This next bit will clean up the messy date columns from the fake csv. 

searchfor = ['http', '-', 'MSNBC']
dffake = dffake[~dffake['date'].str.contains('|'.join(searchfor))]

print('Fake data shape: ', dffake.shape)

Real data shape:  (21417, 5)
Fake data shape:  (23436, 5)


In [6]:
# Now, I want to trim them to be the same size, so that we have our baseline right at 0.5 -the same as flipping a coin. 

dfreal_trimmed = dfreal[-21_400 :]


dffake_trimmed = dffake[-21_400 :]


# and now combine them into one dataframe:
df_joined = dfreal_trimmed.append(dffake_trimmed, ignore_index=True)

df_joined['date'] = pd.to_datetime(df_joined['date'])
df_joined.drop(['subject'], axis=1, inplace=True)
df_joined.drop(['date'], axis=1, inplace=True)

df_joined = df_joined.drop(['title'], axis=1)

print(df_joined.columns)

Index(['text', 'Fake'], dtype='object')


In [7]:
target_body = df_joined['Fake']
Xb = df_joined['text']
yb = target_body


Xb_train, Xb_test, yb_train, yb_test = train_test_split(Xb, yb, test_size=0.2, random_state=42)

# doing 20/80 split and 42. and then the same to split val set from train set.

Xb_train, Xb_val, yb_train, yb_val = train_test_split(Xb_train, yb_train, test_size=0.2, random_state=42) 

print(Xb_train.shape)
print(yb_train.shape)
print(Xb_val.shape)
print(yb_val.shape)
print(Xb_test.shape)
print(yb_test.shape)

(27392,)
(27392,)
(6848,)
(6848,)
(8560,)
(8560,)


In [58]:
model_dash_TFIDF_5_50 = Pipeline([
    ('vectorizer',TfidfVectorizer(stop_words = 'english', strip_accents ='ascii', max_features = 100, min_df= 0.25 , max_df= 0.75)),
    #('dim_red', TruncatedSVD(n_components=19, random_state=42)),
    ('encoder', OrdinalEncoder()),
    ('predictor', GradientBoostingClassifier(random_state=42))
])

model_dash_TFIDF_5_50.fit(Xb_train, yb_train);
# print('train: ', model_dash_TFIDF_5_50.score(Xb_train, yb_train))
# print('val: ', model_dash_TFIDF_5_50.score(Xb_val, yb_val))
# print('test: ',model_dash_TFIDF_5_50.score(Xb_test, yb_test))


train:  0.9951445677570093
val:  0.9919684579439252
test:  0.994392523364486


In [74]:
model_dash_TFIDF_5_50.predict(Xb_train[:30])

array([1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 0, 1], dtype=int64)

In [9]:
#joblib dump
saved_model = dump(model_dash_TFIDF_5_50, 'saved_model.joblib')

In [79]:
d = {'text' : ['He s not wrong.Demonstrating that he is the only adult in a campaign season full of Republicans, Democratic presidential candidate Bernie Sanders ripped the remaining members of the GOP field for acting like children instead of adults who want to be the leader of the free world. What we are seeing in the Republican presidential process is like a sixth grade food fight that you see in a cafeteria,  Sanders said during a stop in Michigan. I think that is not what the American people want. The American people know we have some serious problems and they want to hear some serious solutions to those problems, not vicious personal attacks,  he continued.Sanders went on to shift his focus from the Republican toddlers to the real problems Americans face such as income inequality, corporate greed, and mass imprisonment.Later on, the Vermont Senator blasted Donald Trump, Ted Cruz, and Marco Rubio again for acting like little kids instead of responsible adults. They really do sound like sixth-grade food fights, where amazingly enough adults in their 50s and 60s are throwing food at each other and cursing at each other and making fun of each other and insulting each other,  Sanders said.Here s the video:And Bernie s assessment of the GOP is absolutely spot on. During the last Republican debate, Trump, Rubio, and Cruz put on a circus as they insulted each other. Trump even went so far as to talk about how big his penis is.It s been even more embarrassing on the campaign trail as Rubio has suggested that Trump has a small penis because he has small hands and even suggested that Trump wet himself during the debate. Trump has hit Rubio by mocking his need to quench his thirst while delivering the Republican response to the State of the Union Address a few years ago.It s far from the civility one expects from people who are trying to convince American voters that they are the right person to lead the country and solve problems. But the Republicans are too busy trading insults and talking trash instead of focusing on issues that Americans truly care about.Meanwhile, Democratic candidates Bernie Sanders and Hillary Clinton are taking this election seriously, proving that either one of them would be a good President of the United States who will be respected by the world instead of being laughed at and treated like a joke.Featured image via Bernie Sanders Campaign Website','21st Century Wire says Those on the  left  need to start taking responsibility for their actions.Kathy Griffin, an alleged comedian, has received immense backlash for a stunt she pulled earlier in the week. Griffin apparently thought it would be a great idea to pose with a blood soaked effigy of President Donal Trump s decapitated head.Now, after making the incendiary, and some may say threatening, photograph, she is attempting to turn the narrative around claiming to be a victim of  bullying .Similarly, Hillary Clinton is blaming a conspiracy of 1000 Russian agents working against her, fake news, Twitter bots, and misogyny for her election loss. She makes no mention of the fact that it was clear the Democratic voters wanted Bernie Sanders, not her, and that simply not being Donald Trump was not a good enough reason to vote for her.The massive problem with this scapegoating and more is discussed in the following video report: READ MORE TRUMP NEWS AT: 21st Century Wire Trump FilesSUPPORT 21WIRE  SUBSCRIBE & BECOME A MEMBER @21WIRE.TV','BEIJING (Reuters) - Beijing s city authorities have taken down from their website a policy document put up just a few days ago that looked to help improve the city s notorious air quality by banning construction during winter months. It is unclear if the move means the prohibition is no longer in place, with an official at the Beijing Municipal Commission of Housing and Urban-Rural Development who gave his name as Yu saying the document had been pulled from the website due to misunderstandings over the rules in media reports. He declined to give further details. The statement, dated Sept. 15, was posted on the commission s website last Friday, but it was no longer available on Wednesday. It was not clear when it was withdrawn. Under the plan, all construction of road and water projects, as well as demolition of housing, would be banned from Nov. 15 to March 15 within the city s six major districts and surrounding suburbs. As part of dust control measures, the government often instructs construction sites in northern cities to close during bouts of heavy smog in the winter when households crank up heating, drawing on the power grid which is mainly fueled by coal. Provincial authorities are rushing to enforce the central government s ambitious targets for preventing toxic air during the upcoming colder months as it has ramped up its years-long war on smog. The possible pulling of the construction rules underscores the complexity of implementing some of the steps. Among the most stringent measures are orders for heavy industry such as steel mills to curb output by as much as 50 percent during the colder months. Recent checks of factories across the north have forced many to close or curb operations, roiling supplies of some critical raw materials like coke and coal and sending prices of base metals soaring.']}

pd.DataFrame(data=d)

model_dash_TFIDF_5_50.predict(pd.DataFrame(data=d))


array([1], dtype=int64)

In [81]:
dtest = pd.DataFrame(data=d)
display(dtest)

Unnamed: 0,text
0,He s not wrong.Demonstrating that he is the on...
1,21st Century Wire says Those on the left nee...
2,BEIJING (Reuters) - Beijing s city authorities...


In [84]:
model_dash_TFIDF_5_50.predict(dtest['text'])

array([1, 1, 0], dtype=int64)

In [37]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [73]:
Xb_train.head()

27029    He s not wrong.Demonstrating that he is the on...
41417    21st Century Wire says Those on the  left  nee...
19390    BEIJING (Reuters) - Beijing s city authorities...
20653    (Reuters) - Airlines were racing against the c...
21793    Joni Ernst may portray herself as tough in her...
Name: text, dtype: object

In [77]:
Xb_train.iloc[2]

'BEIJING (Reuters) - Beijing s city authorities have taken down from their website a policy document put up just a few days ago that looked to help improve the city s notorious air quality by banning construction during winter months. It is unclear if the move means the prohibition is no longer in place, with an official at the Beijing Municipal Commission of Housing and Urban-Rural Development who gave his name as Yu saying the document had been pulled from the website due to misunderstandings over the rules in media reports. He declined to give further details. The statement, dated Sept. 15, was posted on the commission s website last Friday, but it was no longer available on Wednesday. It was not clear when it was withdrawn. Under the plan, all construction of road and water projects, as well as demolition of housing, would be banned from Nov. 15 to March 15 within the city s six major districts and surrounding suburbs. As part of dust control measures, the government often instruct

In [51]:
df_joined.head(1)
df_joined.head(-1)

Unnamed: 0,text,Fake
0,The following statements were posted to the ve...,0
1,(Reuters) - A U.S. appeals court in Washington...,0
2,(Reuters) - A gift-wrapped package addressed t...,0
3,WASHINGTON (Reuters) - A federal judge in Seat...,0
4,NEW YORK (Reuters) - The U.S. Justice Departme...,0
...,...,...
42794,Robert Fantina CounterpunchAlthough the United...,1
42795,21st Century Wire says As 21WIRE reported earl...,1
42796,21st Century Wire says It s a familiar theme. ...,1
42797,Patrick Henningsen 21st Century WireRemember ...,1
