In [2]:
from newsapi import NewsApiClient
import pandas as pd
from pandas.io.json import json_normalize
import requests
from datetime import timedelta, date

## Retrieving data

NewsAPI has a maximum of retrievings allowed per day for non-paying services. In order to be able to retrieve more than 100 articles at a time we have to give different keys as an input. If the articles belong to the same date it might rise problems too. 

Therefore a loop is created to retrieve the data per each day we are interested in. Before doing so, a function is created below (daterange) to be able to further pass the dates to the retrieving loop.

In [3]:
days = []

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2019, 11, 14)
end_date = date(2019, 11, 29)
for single_date in daterange(start_date, end_date):
    days.append((single_date.strftime("%Y-%m-%d")))
    
print(days)

['2019-11-14', '2019-11-15', '2019-11-16', '2019-11-17', '2019-11-18', '2019-11-19', '2019-11-20', '2019-11-21', '2019-11-22', '2019-11-23', '2019-11-24', '2019-11-25', '2019-11-26', '2019-11-27', '2019-11-28']


In [4]:
news = []
pages = range(1,16)
keys = ['460632b953f94fa68b6ad908a3957cc6', 'd77bdc087ef64683a07378a43ee85f34', 'fb18f4b2e0ac4807aa923a1321518e7e', '1aae245f22814f898121f6540809f6ac', '7727235a105c47198e6ee4c4ac65151f', 'e49bf561c2364f14a0656ac3b209a8c3', 'cd401a751e80496e8ac5ba60a1621eb1', 'bbcca7d00bee4f22a1fde3fcd5ecd87c', '99b9fc0676ed407d86ac5de1fb036c0f', '0fd5063be0cc4a6da7f9aa3d562505b8', '5bdaf927f72746d6bcb1914388597be8', '5b6adfd7bd5046afb788db7a615664d8', '87125a6d62f04e2885feb2feb36dc15e', 'e98e0776ec1245de88fa821b26c03ddc', '8f117394e362449dbae66afd31495d50']
keyword = "thunberg"
language = "en"
page_size = 100
from_param=days
sort="relevancy"

for key, param in zip(keys, from_param):
    url = f"https://newsapi.org/v2/everything?q={keyword}&apiKey={key}&from={from_param}language={language}&sortBy={sort}&pageSize={page_size}"
    response = requests.get(url)
    if response.status_code == 200:
        news += response.json()['articles']
    else:
        print(f"Error retrieving date {from_param}.")
        
news = pd.DataFrame(news)
    
news.head()

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content
0,"{'id': None, 'name': 'Gizmodo.com'}","Yessenia Funes on Earther, shared by Andrew Co...","23 Questions for Greta Thunberg, 19th-Century ...",Some might say that individuals like Greta Thu...,https://earther.gizmodo.com/23-questions-for-g...,https://i.kinja-img.com/gawker-media/image/upl...,2019-11-21T21:58:00Z,Some might say that individuals like Greta Thu...
1,"{'id': 'wired', 'name': 'Wired'}",Chris D'Angelo,Greta Thunberg Blasts ‘Creative PR’ in Her Cli...,In her latest talk at a major climate change c...,https://www.wired.com/story/greta-thunberg-bla...,https://media.wired.com/photos/5df29a78a7e59c0...,2019-12-12T20:51:16Z,This story originally appeared on HuffPost and...
2,"{'id': 'mashable', 'name': 'Mashable'}",Natasha Pinon,Greta Thunberg criticizes world leaders for fi...,Shortly before Greta Thunberg was chosen as Ti...,https://mashable.com/video/greta-thunberg-un-s...,https://mondrian.mashable.com/2019%252F12%252F...,2019-12-11T16:27:31Z,Shortly before Greta Thunberg was chosen as Ti...
3,"{'id': 'cnn', 'name': 'CNN'}",,Greta Thunberg says she's taking a break,"Greta Thunberg, 16-year-old climate activist a...",https://www.cnn.com/videos/us/2019/12/14/greta...,https://cdn.cnn.com/cnnnext/dam/assets/1912140...,2019-12-14T14:47:45Z,Chat with us in Facebook Messenger. Find out w...
4,"{'id': 'mashable', 'name': 'Mashable'}",Natasha Pinon,Greta Thunberg arrives in Lisbon on her way to...,Following a three-week voyage across the Atlan...,https://mashable.com/article/greta-thunberg-po...,https://mondrian.mashable.com/2019%252F12%252F...,2019-12-03T16:36:39Z,Following a three-week voyage across the Atlan...
