## Loading credentials from another config file

In [1]:
from config import NYT_API_KEY

## Starting with New York Times API

In [2]:
import requests
from config import NYT_API_KEY

In [3]:
# %pip install --upgrade pynytimes
from pynytimes import NYTAPI

api_key = NYT_API_KEY

nyt = NYTAPI(api_key, parse_dates=True)

In [4]:
climate_keywords = ['climate change',
                    'sustainability', 
                    'pollution', 
                    'biodiversity', 
                    'natural disaster', 
                    'environmental policy']

In [32]:
#Import datetime function from datetime library which allows us to create a datetime object

from datetime import datetime, timedelta
import pandas as pd

#Set up start and end date objects

begin = datetime(2016, 12, 31)

end = datetime(2020, 2, 29)

# set range of dates per 7 days
date_range = pd.date_range(start=begin, end=end, freq='1SM')

date_ranges = [(date_range[i], date_range[i+1]-timedelta(days=1)) for i in range(len(date_range)-1)]

#Create dictionary containing dates data
date_dict = {"begin":begin, "end":end}

#Create options dictionary
options_dict = {
    #Sort from earliest to latest
    "sort": "oldest",
    # Return only articles from New York Times, filters out other sources such as AP and Reuters
    "sources": ["New York Times"],
    # Return only straight-forward news in the form of articles
    "type_of_material": ["News Analysis", "News", "Article"],    
}

In [6]:
import time

articles = []

for keyword in climate_keywords:
    for date_range in date_ranges:
        query = keyword
        dates = {"begin": date_range[0], "end": date_range[1]}
        response = nyt.article_search(query=query, dates=dates, results=50, options=options_dict)
        current_time = time.strftime('%H:%M:%S')
        
        print(f'{keyword}: {date_range[0]} - {date_range[1]} n_article = {len(response)} {current_time}')
        
        articles += response
        time.sleep(1)

RetryError: HTTPSConnectionPool(host='api.nytimes.com', port=443): Max retries exceeded with url: /svc/search/v2/articlesearch.json?api-key=GVkGbphR0Jr43uoaaF5t3bsitHvwMju2&sort=oldest&sources=New+York+Times&fq=type_of_material%3A%28%22News+Analysis%22+%22News%22+%22Article%22%29&begin_date=20180630&end_date=20180714&q=climate+change&page=0 (Caused by ResponseError('too many 429 error responses'))

In [None]:
len(articles)

In [None]:
# articles = nyt.article_search(
#     query = "environment",
#     results = 100,
#     dates =date_dict,
#     options = options_dict)

In [35]:
# create an empty list to hold dictionaries of article data
article_data = []

# iterate over articles and add relevant data to list of dictionaries
for article in articles:
    # get the publication date as a datetime object
    pub_date = article['pub_date']
    
    # convert datetime object to string in YYYY-MM-DD format
    pub_date_str = pub_date.strftime('%Y-%m-%d')
    
    # add relevant data to dictionary
    article_data.append({
        'title': article['headline']['main'],
        'snippet': article['abstract'],
        'date': pub_date_str,
        'url': article['web_url']
    })

# create dataframe from list of dictionaries
nyt_df = pd.DataFrame(article_data)

# set the order of columns in the dataframe
nyt_df = nyt_df[['title', 'snippet', 'date', 'url']]

In [36]:
nyt_df

Unnamed: 0,title,snippet,date,url
0,18 Memorable People We Met Across the Country ...,"In a chaotic year, these 18 people left indeli...",2018-12-28,https://www.nytimes.com/2018/12/28/us/18-memor...
1,Four Questions for the Year Ahead,Here are four important unsettled issues for 2...,2018-12-28,https://www.nytimes.com/2018/12/28/business/fo...
2,New E.P.A. Plan Could Free Coal Plants to Rele...,The proposal could set a precedent reaching fa...,2018-12-28,https://www.nytimes.com/2018/12/28/climate/mer...
3,‘It’s Not Going Away’: 2 Years of Documenting ...,Race/Related is a weekly newsletter focused on...,2018-12-29,https://www.nytimes.com/2018/12/29/us/hate-cri...
4,‘You Control Nothing’: House Republicans Brace...,Most Republican representatives are about to e...,2018-12-29,https://www.nytimes.com/2018/12/29/us/politics...
5,Bangladesh’s Leader Wins a Third Term but Oppo...,In a vote with widespread irregularities and i...,2018-12-31,https://www.nytimes.com/2018/12/31/world/asia/...
6,2020 Candidates Are Lining Up. Which Democrat ...,"Democrats are in a moment of transition, grapp...",2018-12-31,https://www.nytimes.com/2018/12/31/us/politics...
7,"In San Francisco, a Younger Sibling Acts Out","At Angler, sister to the three-Michelin-star S...",2018-12-28,https://www.nytimes.com/2018/12/28/travel/in-s...
8,"Along a Road of Sorrow, a Toddler Lost Her Nam...",A roadside bomb wiped out a family in Afghanis...,2018-12-28,https://www.nytimes.com/2018/12/28/world/asia/...
9,The South Asian Artists Making Their Mark on t...,"Women from India, Pakistan and beyond are fina...",2018-12-28,https://www.nytimes.com/2018/12/28/t-magazine/...


In [37]:
nyt_df.to_csv('data/nyt_20181228_20181231.csv', index=False)

In [None]:
articles[0]['abstract']

In [None]:
articles[0]['web_url']

In [None]:
articles[0]['headline']['main']

In [None]:
articles[0]['pub_date'].strftime('%Y-%m-%d')

In [None]:
articles[0]