In [1]:
from apikey import keys
from newsapi import NewsApiClient
from collections import defaultdict
import pandas as pd

newsapi = NewsApiClient(api_key=keys['NewsAPI']['key'])

In [2]:
all_sources = newsapi.get_sources()

In [3]:
print("NewsAPI sources")
print("{0:<30}{1}".format("Source ID", "Source Name"))
print("---------------------------------------------")
for source in all_sources['sources']:
    print("{0:<30}{1}".format(source['id'], source['name']))

NewsAPI sources
Source ID                     Source Name
---------------------------------------------
abc-news                      ABC News
abc-news-au                   ABC News (AU)
aftenposten                   Aftenposten
al-jazeera-english            Al Jazeera English
ansa                          ANSA.it
argaam                        Argaam
ars-technica                  Ars Technica
ary-news                      Ary News
associated-press              Associated Press
australian-financial-review   Australian Financial Review
axios                         Axios
bbc-news                      BBC News
bbc-sport                     BBC Sport
bild                          Bild
blasting-news-br              Blasting News (BR)
bleacher-report               Bleacher Report
bloomberg                     Bloomberg
breitbart-news                Breitbart News
business-insider              Business Insider
business-insider-uk           Business Insider (UK)
buzzfeed                      B

In [4]:
requested_news = newsapi.get_top_headlines(country="us", category="business", page_size=100, page=1)
requested_news

{'status': 'ok',
 'totalResults': 70,
 'articles': [{'source': {'id': 'cnbc', 'name': 'CNBC'},
   'author': 'Robert Ferris',
   'title': 'Tesla suspends orders on website ahead of 5 pm ET announcement - CNBC',
   'description': 'To build anticipation for its planned announcement at 5 pm ET, Tesla is redirecting users on its website to a page that says "The wait is almost over. Great things are launching at 2pm."',
   'url': 'https://www.cnbc.com/2019/02/28/tesla-suspends-online-orders-ahead-of-announcement-redirects-website.html',
   'urlToImage': 'https://fm.cnbc.com/applications/cnbc.com/resources/img/editorial/2018/08/14/105396369-1534271228552rts4hy3.1910x1000.jpg',
   'publishedAt': '2019-02-28T20:32:32Z',
   'content': 'The wait is almost over, even if no one knows what we\'re waiting for. div &gt; div.group &gt; p:first-child"&gt; Electric carmaker Tesla suspended all orders on its website and redirected users to a page teasing a mystery announcement CEO Elon Musk said is co… [+

In [5]:
all_articles = newsapi.get_everything(q='bitcoin',
                                      sources='bbc-news,the-verge',
                                      domains='bbc.co.uk,techcrunch.com',
                                      from_param='2019-02-01',
                                      sort_by='relevancy')

In [6]:
print("Printing first 5 articles about 'bitcoin' from 'bbc-news' and 'the-verge'")
print()
for article in all_articles['articles'][:5]:
    print(article['source']['name'] + "\n" + article['title'] + "\n" + article['description'] + "\n------")

Printing first 5 articles about 'bitcoin' from 'bbc-news' and 'the-verge'

TechCrunch
Coinbase users can now withdraw Bitcoin SV following BCH fork
If you’re a Coinbase user, you may have seen some new tokens on your account. The Bitcoin Cash chain split into two different chains back in November. It means that if you held Bitcoin Cash on November 15, you became the lucky owner of Bitcoin SV and Bitcoin …
------
The Verge
The Samsung Galaxy S10 has a cryptocurrency wallet built in
Samsung is the first major smartphone maker to include a cryptocurrency wallet in its latest flagship Galaxy S10 phones. The wallet lets users store bitcoin, Ethereum, and a beauty-related cryptocurrency called Cosmo Coin. It’s a cold storage wallet, meaning …
------
TechCrunch
Coinbase Pro is about to let you trade XRP
On Tuesday, Coinbase announced that XRP will be the latest cryptocurrency to hit its pro-level trading platform. Coinbase Pro will allow users to transfer XRP to the platform right away (“Afte

In [7]:
import json
with open('all_en_sources.json', 'r') as f:
    all_en_sources_json = json.load(f)

all_en_sources = set()
for source in all_en_sources_json['sources']:
    all_en_sources.add(source['id'])
    
all_en_sources = ','.join(all_en_sources)
print(all_en_sources)

the-guardian-uk,engadget,mtv-news-uk,bloomberg,abc-news,the-telegraph,espn-cric-info,politico,four-four-two,medical-news-today,the-next-web,rte,msnbc,the-new-york-times,google-news,news-com-au,entertainment-weekly,cnbc,next-big-future,the-hill,the-verge,national-geographic,nfl-news,google-news-in,buzzfeed,the-economist,financial-post,reuters,mashable,ars-technica,google-news-ca,hacker-news,al-jazeera-english,abc-news-au,techcrunch,metro,the-lad-bible,fox-news,axios,financial-times,mtv-news,polygon,wired,bleacher-report,new-york-magazine,the-huffington-post,new-scientist,time,breitbart-news,independent,espn,associated-press,news24,the-guardian-au,mirror,nhl-news,australian-financial-review,the-washington-post,techradar,the-hindu,national-review,business-insider,the-sport-bible,bbc-sport,fox-sports,the-irish-times,daily-mail,vice-news,nbc-news,crypto-coins-news,talksport,reddit-r-all,the-american-conservative,the-wall-street-journal,cbc-news,recode,the-times-of-india,the-washington-times

In [9]:
def to_csv(news, filename):
    all_news = []
    for article in news:
        all_news.append([article['source']['name'], article['publishedAt'], article['title'], article['description']])
    pd.DataFrame(all_news, columns=['source', 'date', 'title', 'description']).to_csv(filename)

In [10]:
categories = ['health', 'business', 'sports'] # politics is not an option
max_results = 1000  # max limit of article querying

for category in categories:
    category_news = []
    
    num_articles = 0
    requested_articles = 1
    page = 1

    while num_articles < min(requested_articles, max_results):
        requested_news = newsapi.get_top_headlines(category=category, page=page, page_size=100, country='us')

        requested_articles = requested_news['totalResults']
        num_articles += len(requested_news['articles'])
        category_news.extend(requested_news['articles'])
        page += 1
        
    to_csv(category_news, "sentiments/{}.csv".format(category))