# Scraping news from fake news websites

Fake news websites are taken from
https://github.com/BigMcLargeHuge/opensources/blob/master/sources/sources.csv

## Fake news website list

In [32]:
url_interestingdailynews = 'https://www.interestingdailynews.com'
url_nbccomco = 'http://nbc.com.co'
url_newsbbcnet = 'http://www.newsbbc.net'
url_madworldnews = 'http://madworldnews.com'
url_majorthoughts = 'http://majorthoughts.com'
url_nationonenews = 'http://nationonenews.com'

In [13]:
import newspaper
import pandas as pd
import numpy as np
import os

In [24]:
# collect all or most recent 100 articles from a website
def generate_data(news_url):
    col_names = ["url","source", "title", "author", "text"]
    article_df = pd.DataFrame(columns = col_names)
    
    print(news_url)
    news_articles = newspaper.build(news_url, memoize_articles=False)
    
    size_articles = news_articles.size()
    num_news = 100
    if size_articles < num_news:
        num_news = size_articles
     
    news_brand = news_articles.brand
    count = 0
    for i in range(0,num_news):
        article = news_articles.articles[i];
        try:
            article.download()
            article.parse()
            print(article.url)
            
            entry = pd.DataFrame([[article.url, news_brand, \
                                   article.title, article.authors, article.text]], columns=col_names)
            article_df = article_df.append(entry)
            count += 1
        except:
            pass
        
    print("The total number of " + str(news_brand) + " articles is ", count) 
        
    article_df.set_index('url', inplace=True)
    return article_df    

In [63]:
# check the number of articles in the website before scraping 
# some websites do not have many articles
url = 'http://nationonenews.com'
articles_list = newspaper.build(url,memoize_articles=False)
articles_list.size()

62

## 1. Obtain news from 'https://www.interestingdailynews.com'

In [33]:
df_interestingdailynews = generate_data(url_interestingdailynews)

https://www.interestingdailynews.com
You must `download()` an article first!
You must `download()` an article first!
https://www.interestingdailynews.com/not-just-cnn-griffin-dumped-advertisers-career-self-decapitation/
https://www.interestingdailynews.com/man-rapes-infant-leaves-something-remember/
https://www.interestingdailynews.com/pro-trump-billboard-causes-huge-controversy-small-southern-town/
https://www.interestingdailynews.com/huge-melee-breaks-out-trump-rally-attacked-by-thugs-but-supporters-were-prepared-for-war-video/
https://www.interestingdailynews.com/2017-chevrolet-cruze-diesel-epa-rated-3052-mpg-with-manual/
https://www.interestingdailynews.com/benellis-upcoming-750cc-parallel-twin-naked-spotted/
https://www.interestingdailynews.com/rachel-maddow-exposes-mike-pences-4-biggest-lies-must-watch/
https://www.interestingdailynews.com/alert-1-million-pounds-of-americas-favorite-frozen-food-recalled-throw-it-away-now-it-could-kill-your-family/
https://www.interestingdailynews

In [131]:
output_path = os.path.join('data','indv_file', 'interestingdailynews.csv')
df_interestingdailynews.to_csv(output_path)

In [35]:
# read data
input_path = output_path
data_interestingdailynews = pd.read_csv(input_path)

In [37]:
data_interestingdailynews.head()

Unnamed: 0,url,source,title,author,text
0,https://www.interestingdailynews.com/not-just-...,interestingdailynews,Not Just CNN: Griffin Dumped By Other Advertis...,['Interesting Daily News'],Not Just CNN: Griffin Dumped By Other Advertis...
1,https://www.interestingdailynews.com/man-rapes...,interestingdailynews,"Man Rapes Infant, Leaves Her With Something To...",['Interesting Daily News'],"Man Rapes Infant, Leaves Her With Something To..."
2,https://www.interestingdailynews.com/pro-trump...,interestingdailynews,Pro-Trump Billboard Causes ‘HUGE’ Controversy ...,['Interesting Daily News'],"It’s a stark, simple message on a black billbo..."
3,https://www.interestingdailynews.com/huge-mele...,interestingdailynews,HUGE MELEE BREAKS OUT! Trump Rally Attacked By...,['Interesting Daily News'],HUGE MELEE BREAKS OUT! Trump Rally Attacked By...
4,https://www.interestingdailynews.com/2017-chev...,interestingdailynews,2017 CHEVROLET CRUZE DIESEL EPA-RATED 30/52 MP...,['Interesting Daily News'],Nine-speed auto rated 31/47 mpg\n\nThe 2017 Ch...


## 2. Obtain news from 'http://nbc.com.co'

In [38]:
df_nbccomco = generate_data(url_nbccomco)

http://nbc.com.co
http://nbc.com.co/suicide-will-affect-aaron-hernandezs-estate-payout/
http://nbc.com.co/News/entertainment/music/
http://nbc.com.co/lifeguards-lose-protection-personal-injury-liability/
http://nbc.com.co/vince-gilligan-announces-breaking-bad-season-6-begins-shooting-august-2015/
http://nbc.com.co/best-places-find-free-roulette-game-web/
http://nbc.com.co/bill-murray-is-a-hero-in-london-after-saving-the-life-of-a-child-from-oncoming-traffic/
http://nbc.com.co/News/global/criminal/national/
http://nbc.com.co/News/news/
http://nbc.com.co/trump-to-build-wall-out-of-homes-for-veterans/
http://nbc.com.co/horror-movies-themes-online-games-popularity/
http://nbc.com.co/News/nbc-news/
http://nbc.com.co/News/global/criminal/regional/
http://nbc.com.co/News/global/criminal/
http://nbc.com.co/jonathan-gregory-the-creepiest-comedian-in-america/
http://nbc.com.co/News/politics/
http://nbc.com.co/personal-injury-claims-ford-explorer-rise/
http://nbc.com.co/News/lifestyle/
http://nbc

In [132]:
path = os.path.join('data','indv_file', 'nbccomco.csv')
df_nbccomco.to_csv(path)  

# test: read data
data_nbccomco = pd.read_csv(path)
data_nbccomco.tail()

Unnamed: 0,url,source,title,author,text
22,http://nbc.com.co/News/entertainment/,nbc,Entertainment Archives,"['Almira Anke', 'Hanif Ibrahim', 'Jacob Morgan']",People are somehow wired to love scary stuff. ...
23,http://nbc.com.co/top-5-large-tablets-buy-righ...,nbc,Top 5 Large Tablets To Buy Right Now,"['Almira Anke', 'Hanif Ibrahim', 'Jacob Morgan']",Full-size tablets have been popular ever since...
24,http://nbc.com.co/News/technology/,nbc,Technology Archives,"['Almira Anke', 'Hanif Ibrahim', 'Jacob Morgan']",Counter-Strike: Global Offensive is one of the...
25,http://nbc.com.co/hate-crimes-rise-since-trump...,nbc,Are Hate Crimes On The Rise Since Trump Took O...,"['Almira Anke', 'Hanif Ibrahim', 'Jacob Morgan']",Whether you are in favor of or against the Tru...
26,http://nbc.com.co/push-cap-damages-medical-mal...,nbc,Push To Cap Damages For Medical Malpractice On...,"['Almira Anke', 'Hanif Ibrahim', 'Jacob Morgan']",Big changes may be on the horizon for the stat...


## 3. Obtain news from 'http://www.newsbbc.net'

In [41]:
df_newsbbcnet = generate_data(url_newsbbcnet)

http://www.newsbbc.net
http://yournewswire.com/john-kerry-calls-for-russia-and-syria-war-crimes-investigation/
http://www.globalhealingcenter.com/natural-health/what-is-a-raw-vegan-diet/?a_aid=54e3fadc2316d
http://www.zerohedge.com/news/2016-10-06/leaked-memo-confirms-hillary-was-given-questions-ahead-interview
http://www.globalhealingcenter.com/natural-health/healthy-lifestyle-changes-to-make-today/
http://inhabitat.com/trump-sons-selling-post-inauguration-access-and-private-hunting-trip-for-1m/
http://www.dailymail.co.uk/news/article-3827732/U-S-accuses-Russia-hacking-attempts-political-groups.html
http://freebeacon.com/politics/clinton-campaign-helped-script-steve-harvey-interview/
http://time.com/4634078/rex-tillerson-south-china-sea-donald-trump/
http://www.newsbbc.net/2017/04/tribute-for-family-of-soldier-killed.html
http://yournewswire.com/scientific-study-towers-collapsed-due-to-controlled-demolition/
http://www.newsbbc.net/2017/07/cia-agent-confesses-on-deathbed-we-blew.html
h

In [133]:
path = os.path.join('data', 'indv_file','newsbbcnet.csv')
df_newsbbcnet.to_csv(path)  

# test: read data
data_newsbbcnet = pd.read_csv(path)
data_newsbbcnet.tail()  

Unnamed: 0,url,source,title,author,text
82,http://townhall.com/tipsheet/guybenson/2015/10...,newsbbc,Humiliation: Iran Test Fires Long-Range Missil...,"['Guy Benson', ""Cortney O'Brien"", 'Katie Pavli...","Oh, Here We Go: Left Wingers Say Trump's Speec..."
83,http://www.newsbbc.net/2016/10/us-officially-a...,newsbbc,U.S. Officially Accuses Russia Of Political Hacks,['Rossy Smith'],"Comprehensive up-to-date online news coverage,..."
84,http://www.newsbbc.net/2016/10/kanye-west-says...,newsbbc,Kanye West Says Illuminati Behind Paris Robbery,['Rossy Smith'],"Comprehensive up-to-date online news coverage,..."
85,http://www.nbcnews.com/meet-the-press/video/vp...,newsbbc,VP Biden on Russia and Cyber Warfare,[],MTP Exclusive: VP Biden Promises Response to R...
86,http://www.cbsnews.com/news/report-cia-lost-of...,newsbbc,Report: CIA Lost Office In WTC,['Cbsnews.Com Staff Cbsnews.Com Staff'],A secret office operated by the CIA was destro...


## 4. Obtain news from 'http://madworldnews.net'

In [50]:
df_madworldnews = generate_data(url_madworldnews) 

http://madworldnews.com
http://madworldnews.tumblr.com/post/163459150247/as-snowflakes-melt-over-trumps-transgender#notes
http://madworldnews.tumblr.com/post/163456261437/ben-jerrys-force-liberal-agenda-on-customers
http://madworldnews.tumblr.com/post/163455894382/teen-killed-dismembered-after-discovering-dads
http://madworldnews.tumblr.com/post/163458421947/what-dirty-dem-quietly-did-after-corrupt-it
http://madworldnews.tumblr.com/post/163456537032/melania-gets-revenge-on-michelle-obama-at-ohio
http://madworldnews.tumblr.com/post/163455658862/homesick-florida-woman-decides-to-check-google
http://madworldnews.tumblr.com/post/163457493467/us-senate-front-runner-declares-2-words-at-pork
http://madworldnews.tumblr.com/post/163459150247/as-snowflakes-melt-over-trumps-transgender
http://madworldnews.tumblr.com/post/163459516997/florida-thug-shoots-cop-in-the-face-gets-instant
http://madworldnews.tumblr.com/post/163455055437/ohio-pervert-raped-girlfriends-toddler-to-death#notes
http://madwor

In [134]:
path = os.path.join('data', 'indv_file','madworldnews.csv')
df_madworldnews.to_csv(path)  

# test: read data
data_madworldnews = pd.read_csv(path)
data_madworldnews.tail() 

Unnamed: 0,url,source,title,author,text
10,http://madworldnews.com/2017/07/,madworldnews,Home • Mad World News,[],Melania Trump is ruthlessly scrutinized by the...
11,http://madworldnews.tumblr.com/post/1634565370...,madworldnews,MadWorldNews.com • Melania Gets Revenge On Mic...,[],The Voice of Reason in an Insane World - Mad W...
12,http://madworldnews.tumblr.com/post/1634550554...,madworldnews,MadWorldNews.com • Ohio Pervert Raped Girlfrie...,[],The Voice of Reason in an Insane World - Mad W...
13,http://madworldnews.tumblr.com/post/1634584219...,madworldnews,MadWorldNews.com • What Dirty Dem Quietly Did ...,[],The Voice of Reason in an Insane World - Mad W...
14,http://madworldnews.tumblr.com/post/1634597849...,madworldnews,MadWorldNews.com • Liberals Freak Out As Trump...,[],The Voice of Reason in an Insane World - Mad W...


## 5. Obtain news from 'http://majorthoughts.com'

In [56]:
df_majorthoughts = generate_data(url_majorthoughts)

http://majorthoughts.com
http://www.majorthoughts.com/gata-top-demais-representando-muito-linda-twerk-brazil-twerk-shake_024c27997.html
http://www.majorthoughts.com/list-of-rappers-currently-in-jail_aefd81c8d.html
http://www.majorthoughts.com/one-thot-puts-deadly-hands-on-other-thot-ghetto-fight-kings-2k15-1_dccd92699.html
http://www.majorthoughts.com/meek-mill-tells-nicki-minaj-to-give-his-35million-or-see-smoke-pull-over_b73a0872d.html
http://www.majorthoughts.com/she-should-have-never-put-hands-on-her-baby-people-roll-up-to-two-women-having-a-cat_a9c75ea67.html
http://www.majorthoughts.com/boosie-badazz-dice-game-with-meek-mill-lucci_6be2ddb97.html
http://www.majorthoughts.com/5-moments-you-wouldn%e2%80%99t-believe-if-they-weren%e2%80%99t-recorded_aa6ced657.html
http://www.majorthoughts.com/meek-mill-when-she-give-my-30million-i-will-tell-her-who-robbed-her-house_b6f55583b.html
http://www.majorthoughts.com/eastside-ghetto-fights-i-got-it_519a869e2.html
http://www.majorthoughts.com/b

In [135]:
path = os.path.join('data', 'indv_file','majorthoughts.csv')
df_majorthoughts.to_csv(path)
# test: read data
data_majorthoughts = pd.read_csv(path)
data_majorthoughts.tail()  

Unnamed: 0,url,source,title,author,text
10,http://www.majorthoughts.com/how-to-get-a-bad-...,majorthoughts,HOW TO GET A BAD COP FIRED,[],Download your free copy of RISE OF THE WARRIOR...
11,http://www.majorthoughts.com/dbz-ghetto-fights...,majorthoughts,Dbz ghetto fights in the hood,[],"crazy shit goes on in the hood, enter if you d..."
12,http://www.majorthoughts.com/nicki-minaj-pulls...,majorthoughts,Nicki Minaj Pulls Up On Meek Mill At CVS Pharm...,[],Meek Mill in LA at CVS with Nicki Minaj\n\nSub...
13,http://www.majorthoughts.com/50-cent-clowns-bo...,majorthoughts,50 Cent Clowns Bow Wow Nails Bow Wow Challenge...,[],Thanks! Share it with your friends!\n\n×\n\nYo...
14,http://www.majorthoughts.com/uploads/thumbs/20...,majorthoughts,Major Thoughts,[],


## 6. Obtain news from 'http://nationonenews.com'

In [64]:
df_nationonenews = generate_data(url_nationonenews)

http://nationonenews.com
https://nationonenews.com/2017/07/23/breaking-san-antonio-human-smuggling-8-dead-28-serious-condition.html
https://nationonenews.com/2017/07/22/stein-recount-questions.html
https://nationonenews.com/2017/07/22/leaks-trump-going-money.html
https://nationonenews.com/2017/07/26/president-trump-just-started-war-can-win.html/feed
https://nationonenews.com/2017/07/14/trump-took-time-recognize-real-heros-france.html/feed
https://nationonenews.com/2017/03/06/porn-stars-get-offended.html
https://nationonenews.com/2016/11/18/video-high-school-takes-mannequin-challenge-new-extreme-raises-bar.html
https://nationonenews.com/2017/07/09/trump-helps-marine-everyone-missed.html
https://nationonenews.com/2017/05/16/immediately-abc-stabbed-tim-allen-back-learned-will-last-man-standing-not-abc.html
https://nationonenews.com/2017/07/10/trump-responds-report-says-james-comey-leaked-classified-information.html
https://nationonenews.com/2017/07/09/trump-helps-marine-everyone-missed.ht

In [136]:
path = os.path.join('data','indv_file', 'nationonenews.csv')
df_nationonenews.to_csv(path)
# test: read data
data_nationonenews = pd.read_csv(path)
data_nationonenews.tail() 

Unnamed: 0,url,source,title,author,text
57,https://nationonenews.com/c/news,nationonenews,Nation One News,[],We use cookies to give you the best possible e...
58,https://nationonenews.com/2017/07/22/stein-rec...,nationonenews,"Comments on: Jill Steins still has $1,361,834....",[],
59,https://nationonenews.com/2017/07/21/not-fast-...,nationonenews,"Not so fast, Scaramucci is not replacing Spice...",[],Some news organizations are clamoring to disto...
60,https://nationonenews.com/2017/07/05/watch-fox...,nationonenews,"[WATCH] Fox News Put The Washington ""Hurt"" On ...",[],The Democrats in Congress will stop at nothing...
61,https://nationonenews.com/2017/07/22/viral-vid...,nationonenews,Comments on: [Viral Video] Trump appears on Ga...,[],


## 7. infostormer

In [71]:
url_infostormer = 'http://www.infostormer.com'
df_infostormer = generate_data(url_infostormer)

http://www.infostormer.com
http://www.infostormer.com/tumor-mctumorface-aka-john-insane-mccain-returns-to-the-senate/
http://www.infostormer.com/category/news/
http://www.infostormer.com/jewess-debbie-wasserman-schultzs-paki-it-guy-detained-trying-to-flee-the-country/
http://www.infostormer.com/feds-accuse-google-of-not-paying-women-enough/
http://www.infostormer.com/category/news/radio/
http://www.infostormer.com/mixed-race-monkey-golfer-tiger-woods-arrested-for-dui/
http://www.infostormer.com/fbi-apparently-seized-smashed-computer-hard-drives-from-home-of-jewess-debbie-wasserman-schultz-paki-it-monkey/
http://www.infostormer.com/kikes-and-retarded-leftists-launch-major-protests-demanding-to-see-trumps-tax-returns/
http://www.infostormer.com/trump-delivers-monday-morning-tweet-storm/
http://www.infostormer.com/category/news/societal-degradation/
http://www.infostormer.com/trump-warns-boy-scouts-about-fake-news/
http://www.infostormer.com/16-year-old-german-girl-found-in-mosul-with-isi

In [137]:
path = os.path.join('data', 'indv_file','infostormer.csv')
df_infostormer.to_csv(path)
# test: read data
data_infostormer = pd.read_csv(path)
data_infostormer.tail() 

Unnamed: 0,url,source,title,author,text
34,http://www.infostormer.com/category/news/us-news/,infostormer,Infostormer.Com,[],This debate on healthcare has become ridiculou...
35,http://www.infostormer.com/oxford-study-claims...,infostormer,"Oxford Study Claims Rody Duterte Spent $200,00...",['Infostormer'],An Oxford study is claiming that Rody Duterte ...
36,http://www.infostormer.com/jewish-council-says...,infostormer,Jewish Council Says Anti-Semitism in Germany i...,['Infostormer'],It looks as if Jews in Germany are concerned t...
37,http://www.infostormer.com/category/news/jewis...,infostormer,Infostormer.Com,[],Even if you film a video promoting the officia...
38,http://www.infostormer.com/new-transformers-fi...,infostormer,New Transformers Film Features Spic Teen Lectu...,['Infostormer'],The entire series of Transformers films direct...


## 8. newswithviews

In [81]:
url_newswithviews = 'http://www.newswithviews.com'
df_newswithviews = generate_data(url_newswithviews)

http://www.newswithviews.com
http://southernoregon.newswithviews.com/josephine-county-is-suing-the-federal-government/
http://southernoregon.newswithviews.com/update-on-josephine-county-gmo-laws/
https://newswithviews.com/supreme-court-rules-in-favor-of-trumps-anti-terrorism-travel-law/
http://southernoregon.newswithviews.com/trump-campaign-worker-dr-diann-morrrison-will-speak-in-grants-pass-to-josephine-county-republicans/
http://nwpr.org/post/oregon-governor-issues-executive-order-meant-solidify-states-sanctuary-status
https://newswithviews.com/to-russia-with-love-part-3/
https://newswithviews.com/evangelicals-pray-for-president-in-w-h-democrats-outraged/
http://www.nbcnews.com/feature/college-game-plan/mental-health-problems-rising-among-college-students-n777286
https://newswithviews.com/unashamed-anti-zionism-and-demagoguery-of-pastor-chuck-baldwin/
https://newswithviews.com/left-montana-going-to-georgia-to-help-republican-karen-handel/
http://www.washingtontimes.com/news/2017/mar/

In [83]:
df_newswithviews.shape

(60, 4)

In [86]:
df_newswithviews = \
df_newswithviews.drop(['http://nwpr.org/post/oregon-governor-issues-executive-order-meant-solidify-states-sanctuary-status', \
                       'http://www.nbcnews.com/feature/college-game-plan/mental-health-problems-rising-among-college-students-n777286', \
                       'http://www.washingtontimes.com/news/2017/mar/22/us-heading-toward-lawlessness/',\
                       'http://www.foxnews.com/politics/2017/06/29/house-passes-kate-s-law-as-part-illegal-immigrant-crackdown.html', \
                       'http://www.oregonlive.com/health/index.ssf/2017/07/top_10_oregon_counties_for_gon.html',\
                       'http://www.lifenews.com/2017/02/16/oregon-senate-bill-494-would-allow-starving-mentally-ill-patients-to-death/',\
                       'http://theredelephants.com/breaking-trump-activist-stabbed-9-times/',\
                       'https://en.wikipedia.org/wiki/Civil_forfeiture_in_the_United_States', \
                       'https://www.yahoo.com/news/us-rolls-travel-ban-hit-162422613.html', \
                       'http://www.mailtribune.com/news/20170424/talent-voters-to-decide-on-diesel-tax', \
                       'http://www.mailtribune.com/article/20120724/news/207240315'
                      ])

In [87]:
df_newswithviews.shape

(49, 4)

In [138]:
path = os.path.join('data', 'indv_file','newswithviews.csv')
df_newswithviews.to_csv(path)
# test: read data
data_newswithviews = pd.read_csv(path)
data_newswithviews.tail() 

Unnamed: 0,url,source,title,author,text
44,https://newswithviews.com/nyt-interviews-trump...,newswithviews,NYT Interviews Trump About Sessions’ Recusal,[],NYT Interviews Trump About Sessions’ Recusal\n...
45,https://newswithviews.com/declaration-of-indep...,newswithviews,Declaration of Independence: Our Rights Come F...,[],Declaration of Independence: Our Rights Come F...
46,http://southernoregon.newswithviews.com/the-ma...,newswithviews,The March to World War III,[],"The March to World War III\n\nApril 24, 2017\n..."
47,https://newswithviews.com/author/news/,newswithviews,News With Views,['News With Views'],No Doctor or Insurance needed! Learn More
48,https://newswithviews.com/fbi-warned-to-retrie...,newswithviews,FBI Warned To Retrieve Documents Removed Unlaw...,[],FBI Warned To Retrieve Documents Removed Unlaw...


In [105]:
# check the number of articles in the website before scraping 
# some websites do not have many articles
url = 'http://politicot.com'
articles_list = newspaper.build(url,memoize_articles=False)
articles_list.size()

62

## 9. nephef

In [106]:
url_nephef = 'http://www.nephef.com'
df_nephef = generate_data(url_nephef)

http://www.nephef.com
http://www.nephef.com/key-rules-cold-weather-weight-loss/#mh-comments
http://www.nephef.com/whats-covfefe-trump-tweet-unites-bewildered-nation/
http://www.nephef.com/10-worst-foods-heart-according-cardiologist/
http://www.nephef.com/richard-gere-blasts-trump-immigration-policy-must-embrace-love/
http://www.nephef.com/whoopi-goldberg-navy-seal-widow-looking-attention/
http://www.nephef.com/fat-cutter-drink-extreme-weight-loss/#mh-comments
http://www.nephef.com/dhs-secretary-donald-trump-working-streamlined-travel-ban/
http://www.nephef.com/trump-may-affirm-special-relationship-us-uk/
http://www.nephef.com/category/news/world/
http://www.nephef.com/18-people-fired-participating-day-without-immigrants/
http://www.nephef.com/category/news/u-s-a/
http://www.nephef.com/key-rules-cold-weather-weight-loss/
http://www.nephef.com/trying-lose-weight-winter-5-tips-can-help/#mh-comments
http://www.nephef.com/fat-cutter-drink-extreme-weight-loss/
http://www.nephef.com/category/

In [139]:
path = os.path.join('data','indv_file', 'nephef.csv')
df_nephef.to_csv(path)
# test: read data
data_nephef = pd.read_csv(path)
data_nephef.tail() 

Unnamed: 0,url,source,title,author,text
26,http://www.nephef.com/trump-will-withdraw-u-s-...,nephef,Trump Will Withdraw U.S. From Paris Climate Ag...,[],Mr. Trump said that the United States will imm...
27,http://www.nephef.com/girls-daring-love-making...,nephef,"Girls, These Daring Love Making Positions Will...",[],It gets truly troublesome when you are comfort...
28,http://www.nephef.com/leader-next-womens-march...,nephef,Leader of next Women’s March is “Palestinian” ...,[],"The document was co-authored by, among others,..."
29,http://www.nephef.com/covfefe-tells-need-know-...,nephef,‘Covfefe’ tells you all you need to know about...,[],"And then, “covfefe.”\n\nWhat it should prove i..."
30,http://www.nephef.com/trying-lose-weight-winte...,nephef,Trying To Lose Weight This Winter? These 5 Tip...,[],"Come New Years Day, most people kick into high..."


## 10. local31news

In [109]:
url_local31news = 'http://local31news.com'
df_local31news = generate_data(url_local31news)

http://local31news.com
http://local31news.com/featured/how-daylight-savings-affects-us-in-the-north/
http://local31news.com/featured/how-brits-see-american-tv/
http://local31news.com/prescott-arizona/jennifer-aniston-said-this-about-prescott-arizona-residents-2/
http://local31news.com/featured/elephant-roars-like-a-dinosaur/
http://local31news.com/portland-texas/jim-carrey-moves-to-portland-texas-tells-us-why/
http://local31news.com/featured/teens-save-kidnap-victim-who-mouthed-help-me-from-car/
http://local31news.com/featured/the-scariest-man-on-the-planet-has-the-weakest-battle-scream-on-the-planet/
http://local31news.com/featured/americas-oldest-living-veteran-credits-his-long-life-to-cigars-whiskey-and-guns/
http://local31news.com/featured/pastor-fails-miserably-when-interviewed-by-court-about-how-he-lavishly-spends-the-ministrys-funds/
http://local31news.com/noblesville-indiana/tom-hanks-explains-why-hes-moving-to-noblesville-indiana/
http://local31news.com/featured/temps-in-arizo

In [140]:
path = os.path.join('data','indv_file', 'local31news.csv')
df_local31news.to_csv(path)
# test: read data
data_local31news = pd.read_csv(path)
data_local31news.tail() 

Unnamed: 0,url,source,title,author,text
16,http://local31news.com/featured/biggie-smalls-...,local31news,Biggie Smalls Calms Down Crying Baby,[],“I appreciate that he was rigorous enough in h...
17,http://local31news.com/wilmington-delaware/sam...,local31news,Samuel L. Jackson Says This About the Resident...,[],"WILMINGTON, Delaware – During a radio intervie..."
18,http://local31news.com/featured/chris-pratt-ac...,local31news,Chris Pratt Accidentally Gives Away Travel Tri...,[],Due to some shady behavior on game shows in th...
19,http://local31news.com/mesquite-nevada/matt-da...,local31news,Matt Damon Had This to Say About the Residents...,[],"MESQUITE, Nevada – During a radio interview th..."
20,http://local31news.com/featured/shark-breaches...,local31news,Shark Breaches Diving Cage with Diver Inside,[],Pretty lame to see these sharks are baited so ...


## 11. lastdeplorables

In [112]:
url_lastdeplorables = 'http://lastdeplorables.com'
df_lastdeplorables = generate_data(url_lastdeplorables)

http://lastdeplorables.com
http://lastdeplorables.com/obama-caught-sneaking-back-u-s-alaska/
http://lastdeplorables.com/muslim-cadet-refuses-remove-hijab-military-college-wipes-smug-grin-off-face/
http://lastdeplorables.com/category/news/
http://lastdeplorables.com/barack-just-broke-law-take-president-secretly-met-behind-trumps-back/
http://lastdeplorables.com/don-lemon-walks-off-cnn-set-guest-calls-fake-news/
http://lastdeplorables.com/muslim-punches-white-bus-driver-allah-forgot-teach-one-thing-video/
http://lastdeplorables.com/breaking-look-just-shot-sky-alaska-north-korea/
http://lastdeplorables.com/guilty-guess-got-caught-smuggling-muslims-united-states/
http://lastdeplorables.com/facebook-destroys-massive-pro-trump-group-deplorables-attack/
http://lastdeplorables.com/breaking-california-wants-18-million-else/
http://lastdeplorables.com/president-donald-trump-sends-plane-save-charlie-gards-life/
http://lastdeplorables.com/angry-muslim-confronts-cashier-wearing-u-s-flag-happened-ne

In [141]:
path = os.path.join('data','indv_file', 'lastdeplorables.csv')
df_lastdeplorables.to_csv(path)
# test: read data
data_lastdeplorables = pd.read_csv(path)
data_lastdeplorables.tail() 

Unnamed: 0,url,source,title,author,text
19,http://lastdeplorables.com/trumps-border-wall-...,lastdeplorables,Trump’s Border Wall Has One New Feature That E...,['Robert Amnor'],Of all the wonderful things that President Don...
20,http://lastdeplorables.com/trumps-new-air-forc...,lastdeplorables,"Trump’s New Air Force One Just Arrived, Look W...",['Robert Amnor'],Seeing how the president receives many threats...
21,http://lastdeplorables.com/whoopi-liberal-holl...,lastdeplorables,Whoopi: Liberal Hollywood Celebrities ARE THE ...,['Robert Amnor'],Whoopi Goldberg of The View is tired of all th...
22,http://lastdeplorables.com/obama-abruptly-ends...,lastdeplorables,Obama Abruptly Ends Vacay And Shows Up In D.C....,['Robert Amnor'],Obama has had an extremely difficult time reli...
23,http://lastdeplorables.com/24-hours-ohio-musli...,lastdeplorables,24 Hours After Ohio Muslim Is Charged In Terro...,['Robert Amnor'],"Time and time again, Muslims continue to prove..."


## 12. ladylibertysnews

In [115]:
url_ladylibertysnews = 'http://ladylibertysnews.com'
df_ladylibertysnews = generate_data(url_ladylibertysnews)

http://ladylibertysnews.com
http://ladylibertysnews.com/stop-operation-soros-sos-huge-movement-overthrow-george-soros-explodes-macedonia/
http://ladylibertysnews.com/australia-calling-total-ban-muslim-immigration-mosques-burkas/
http://ladylibertysnews.com/category/news/
http://ladylibertysnews.com/drug-dealer-released-early-obama-program-murders-woman-kids/
http://ladylibertysnews.com/caught-webs-obama-unmasking-probe-just-blew-wide-open/
http://ladylibertysnews.com/epic-smackdown-lou-dobbs-corrupt-history-obama-just-hit-fainting-couch/
http://ladylibertysnews.com/fire-brimstone-trump-terrifies-republican-senators-ultimate-threat/
http://ladylibertysnews.com/thousands-muslims-take-streets-nyc-protest-trump-refugee-ban/
http://ladylibertysnews.com/neil-7done-counter-strike-supreme-court-just-gave-trump-best-news-life/
http://ladylibertysnews.com/muslim-migration-planned-invasion-destroy-western-christendom-according-bachmann/
http://ladylibertysnews.com/secret-meeting-trump-mattis-chan

In [142]:
path = os.path.join('data', 'indv_file','ladylibertysnews.csv')
df_ladylibertysnews.to_csv(path)
# test: read data
data_ladylibertysnews = pd.read_csv(path)
data_ladylibertysnews.tail() 

Unnamed: 0,url,source,title,author,text
14,http://ladylibertysnews.com/rosenstein-just-de...,ladylibertysnews,ROSENSTEIN Just DESTROYED James Comey With One...,"[""Alen O'Neill""]",Former FBI Director James Comey was slow durin...
15,http://ladylibertysnews.com/muslims-offended-k...,ladylibertysnews,Radical Muslims in Australia OFFENDED That The...,['Todd Cooney'],"How intolerant can a group of people get, folk..."
16,http://ladylibertysnews.com/major-blunder-bill...,ladylibertysnews,MAJOR BLUNDER: Bill Clinton’s Former Adviser S...,"[""Alen O'Neill""]",The mainstream media cannot find legitimate ne...
17,http://ladylibertysnews.com/liberalism-looks-l...,ladylibertysnews,"LIBERALISM TODAY: Leftist Thug Has 15 Kids, 13...",['Ward Bond'],This is exactly what Trump is trying to end. F...
18,http://ladylibertysnews.com/thugs-thought-home...,ladylibertysnews,THUGS Thought Home Invasion & Rape Were A Good...,"[""Alen O'Neill""]",Protecting one’s family is inherent in most no...


## 13. krbcnews

In [118]:
url_krbcnews = 'http://krbcnews.com'
df_krbcnews = generate_data(url_krbcnews)

http://krbcnews.com
http://krbcnews.com/2016/04/14/houston-violent-anti-trump-protesters-block-tx-35-near-hobby-airport/#respond
http://krbcnews.com/2016/04/17/a-new-strain-of-super-gonorrhoea-is-ripping-through-houstons-youth/
http://krbcnews.com/2016/04/14/rockstar-announces-gta-6-set-in-fictional-city-based-on-houstongalveston-tx/#respond
http://krbcnews.com/2016/04/14/houston-violent-anti-trump-protesters-block-tx-35-near-hobby-airport/feed/
http://krbcnews.com/2016/05/01/tool-rumored-to-be-in-final-stages-of-%ce%bf%ce%b5%ce%b4%ce%b9%cf%80%cf%85%cf%83/
http://krbcnews.com/2016/04/17/a-new-strain-of-super-gonorrhoea-is-ripping-through-houstons-youth/feed/
http://krbcnews.com/2016/05/01/tool-rumored-to-be-in-final-stages-of-%ce%bf%ce%b5%ce%b4%ce%b9%cf%80%cf%85%cf%83/#respond
http://krbcnews.com/2016/04/14/rockstar-announces-gta-6-set-in-fictional-city-based-on-houstongalveston-tx/
http://krbcnews.com/2016/04/17/a-new-strain-of-super-gonorrhoea-is-ripping-through-houstons-youth/#respo

In [143]:
path = os.path.join('data', 'indv_file','krbcnews.csv')
df_krbcnews.to_csv(path)
# test: read data
data_krbcnews = pd.read_csv(path)
data_krbcnews.tail() 

Unnamed: 0,url,source,title,author,text
14,http://krbcnews.com/2016/04/14/rockstar-announ...,krbcnews,Comments on: Rockstar Announces GTA 6 set in f...,[],
15,http://krbcnews.com/2016/04/14/houston-violent...,krbcnews,HOUSTON: Violent anti-Trump protesters block T...,[],HOUSTON: 4/13 A group of anti-Trump protesters...
16,http://krbcnews.com/2016/04/24/breaking-kardas...,krbcnews,Comments on: BREAKING: Kardashian suspected in...,[],
17,http://krbcnews.com/2016/04/24/breaking-kardas...,krbcnews,BREAKING: Kardashian suspected in Princes deat...,[],Source\n\nRumors had been circling after a vid...
18,http://krbcnews.com/2016/04/30/nearly-4000-sch...,krbcnews,"Nearly 4,000 school websites hacked by ISIS. C...",[],"Reuters reports that nearly 4,000 independent ..."


## 14. now8news

In [121]:
url_now8news = 'http://now8news.com' 
df_now8news = generate_data(url_now8news)

http://now8news.com
http://now8news.com/dead-body-homeless-man-turns-legendary-elvis-presley/
http://now8news.com/several-injured-zombie-like-attack-walmart/
http://now8news.com/missing-4-month-old-baby-found-dead-700-pound-mother/
http://now8news.com/hispanic-woman-claims-donald-trump-paid-sex-cancun-love-child/
http://now8news.com/category/news/
http://now8news.com/girl-infects-586-men-hiv-purpose/
http://now8news.com/fidget-spinner-bursts-flames-killing-toddler-sleep/
http://now8news.com/18-year-old-girl-marries-father-arkansas-belong-together/
http://now8news.com/trump-raising-age-limit-tobacco-consumption-25-years-age/
http://now8news.com/caitlyn-jenner-discusses-desire-transition-black-woman/
http://now8news.com/3-year-old-dies-tickled-death/
The total number of now8news articles is  11


In [144]:
path = os.path.join('data','indv_file', 'now8news.csv')
df_now8news.to_csv(path)
# test: read data
data_now8news = pd.read_csv(path)
data_now8news.tail() 

Unnamed: 0,url,source,title,author,text
6,http://now8news.com/fidget-spinner-bursts-flam...,now8news,Fidget Spinner Bursts Into Flames Killing Todd...,[],The parents of a 3 year old girl woke up to tr...
7,http://now8news.com/18-year-old-girl-marries-f...,now8news,18 Year Old Girl Marries Her Father In Arkansa...,[],18 Year Old Girl Marries Her Father In Arkansa...
8,http://now8news.com/trump-raising-age-limit-to...,now8news,Trump Raising Age Limit For Tobacco Consumptio...,[],There is more bad news for cigarette smokers –...
9,http://now8news.com/caitlyn-jenner-discusses-d...,now8news,Caitlyn Jenner Discusses Her Desire To Transit...,[],Caitlyn Jenner or “CJ” as he refers to herself...
10,http://now8news.com/3-year-old-dies-tickled-de...,now8news,3 Year Old Girl Dies After Accidentally Being ...,[],"Charlotte, NC – It’s a warning being sent out ..."


## Merge all data

In [147]:
merge_df = pd.concat([data_interestingdailynews, data_infostormer, 
                      data_krbcnews, 
                      data_local31news, data_lastdeplorables, data_ladylibertysnews, 
                      data_madworldnews, data_majorthoughts, 
                      data_nbccomco, data_newsbbcnet, 
                      data_nationonenews, data_newswithviews, 
                      data_nephef, data_now8news,
                     ], ignore_index=True)
merge_df.tail()

Unnamed: 0,url,source,title,author,text
435,http://now8news.com/fidget-spinner-bursts-flam...,now8news,Fidget Spinner Bursts Into Flames Killing Todd...,[],The parents of a 3 year old girl woke up to tr...
436,http://now8news.com/18-year-old-girl-marries-f...,now8news,18 Year Old Girl Marries Her Father In Arkansa...,[],18 Year Old Girl Marries Her Father In Arkansa...
437,http://now8news.com/trump-raising-age-limit-to...,now8news,Trump Raising Age Limit For Tobacco Consumptio...,[],There is more bad news for cigarette smokers –...
438,http://now8news.com/caitlyn-jenner-discusses-d...,now8news,Caitlyn Jenner Discusses Her Desire To Transit...,[],Caitlyn Jenner or “CJ” as he refers to herself...
439,http://now8news.com/3-year-old-dies-tickled-de...,now8news,3 Year Old Girl Dies After Accidentally Being ...,[],"Charlotte, NC – It’s a warning being sent out ..."


In [148]:
path = os.path.join('data', 'fakenews_jz.csv')
merge_df.to_csv(path)
# test: read data
data_fakenews = pd.read_csv(path)
data_fakenews.tail()

Unnamed: 0.1,Unnamed: 0,url,source,title,author,text
435,435,http://now8news.com/fidget-spinner-bursts-flam...,now8news,Fidget Spinner Bursts Into Flames Killing Todd...,[],The parents of a 3 year old girl woke up to tr...
436,436,http://now8news.com/18-year-old-girl-marries-f...,now8news,18 Year Old Girl Marries Her Father In Arkansa...,[],18 Year Old Girl Marries Her Father In Arkansa...
437,437,http://now8news.com/trump-raising-age-limit-to...,now8news,Trump Raising Age Limit For Tobacco Consumptio...,[],There is more bad news for cigarette smokers –...
438,438,http://now8news.com/caitlyn-jenner-discusses-d...,now8news,Caitlyn Jenner Discusses Her Desire To Transit...,[],Caitlyn Jenner or “CJ” as he refers to herself...
439,439,http://now8news.com/3-year-old-dies-tickled-de...,now8news,3 Year Old Girl Dies After Accidentally Being ...,[],"Charlotte, NC – It’s a warning being sent out ..."
