In [10]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")

# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

filter_query = 'subject:"United States Economy" AND print_page:1'

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,keywords,pub_date"

years = ["2021","2022","2023","2024"]

for year in years : 

    begin_date = year+"0101"
    end_date = year+"1231"

    # Build URL
    nyt_api_query = url + "fq=" + filter_query \
    + "&fl=" + field_list \
    + "&api-key=" + nyt_api_key \
    + "&begin_date=" + begin_date \
    + "&end_date=" + end_date

    # Open the new CSV file and write the column header names.
    filename = "data/us_economy_headlines_"+year+".csv"
    with open(filename, "w") as file : file.write("datetime,headline,keywords\n")

    # Iterate through each of the potentially 100 pages returned from NYT
    for page in range(100) :

        try : 
            
            # Request the next page from NYT
            print(f"\nRequesting page {page} for year {year}")
            us_econ_data = requests.get(nyt_api_query + "&page=" + str(page)).json()

            # Write the API response to a file so I can save the structure for development
            # can delete these lines when things are working well.
            #response = json.dumps(us_econ_data, indent=4)
            #with open("debug/response_data.txt", "a") as file : file.write(response+"\n")

            # If the results come back empty, we're done for that year, need to 
            # move on to the next year.
            if len(us_econ_data["response"]["docs"]) == 0 : break

            # For each record in the response, I need to pull out the data I need.
            for doc in us_econ_data["response"]["docs"] :
                pub_date = doc["pub_date"]
                headline = doc["headline"]["print_headline"]

                #I'll need to iterate through the keywords data structure when I can get
                # a good example from the NYT again, once I'm out of API jail.
                keywords = []
                for keyword in doc["keywords"] : 
                    if keyword["name"] == "subject" : 
                        keywords.append(keyword["value"])
                keywords_string = ",".join(keywords)

                # Build the string we're going to write to the csv.
                output = pub_date + ',"' + headline + '","['+keywords_string+']"'
                print(output)

                # Write the csv output string to the .csv file.
                with open(filename, "a") as file : file.write(output+"\n")
            
        except:

            # something went wrong, write the error message to the error log.
            errormsg = json.dumps(us_econ_data, indent=4)
            with open("debug/error.txt", "a") as file : file.write(errormsg+"\n")

        print("Sleeping for 15 seconds...")
        time.sleep(15)



Requesting page 0 for year 2019
2019-08-17T10:27:08+0000,"Is a New Recession Imminent? Here’s How One Could Happen","[United States Economy,Recession and Depression,International Trade and World Market,Economic Conditions and Trends,United States International Relations]"
2019-01-17T00:15:01+0000,"In Small Towns, Shutdown Has Ripple Effects","[United States Politics and Government,Government Employees,Shutdowns (Institutional),Wages and Salaries,Labor and Jobs,United States Economy]"
2019-08-01T17:44:59+0000,"Trump Escalates Fight Over Trade With the Chinese","[Customs (Tariff),International Trade and World Market,United States International Relations,United States Economy,United States Politics and Government,Economic Conditions and Trends]"
2019-10-10T09:00:14+0000,"A Steady Paycheck Is Good Medicine for Communities","[Labor and Jobs,Income Inequality,Gangs,Urban Areas,United States Economy,Area Planning and Renewal,Nonprofit Organizations,Health Insurance and Managed Care]"
2019-03