In [4]:
# Reference: https://towardsdatascience.com/collecting-data-from-the-new-york-times-over-any-period-of-time-3e365504004
# https://github.com/brienna/coronavirus-news-analysis/blob/master/2020_05_01_get_data_from_NYT.ipynb
#https://github.com/casmlab/get-nytimes-articles/blob/master/getTimesArticles.py
import os.path

"""
Test key
To activate conda:
source /c/anaconda3/etc/profile.d/conda.sh
conda activate myenv
python -m ipykernel install --user --name myenv --display-name "Python (myenv)
"""
import requests
from pprint import pprint

# TODO: test the code online
# TODO: use semantic keys for asianamericans tag
# 6 seconds wait for a query

Test with the Asian Americans tag topic using Semantic API but get no results

In [7]:
conceptType = "nytd_topic"
conceptName = "Asian Americans"

semanticConceptQuery = f'http://api.nytimes.com/svc/semantic/v2/concept/name/{conceptType}/{conceptName}.json?fields=all&api-key={key}'
r = requests.get(semanticConceptQuery)
pprint(r.json())

{'copyright': 'Copyright (c) 2015 The New York Times Company. All Rights '
              'Reserved.',
 'fields': ['all'],
 'num_results': 0,
 'results': [],
 'status': 'OK'}


Test with subject in Article Search API

In [8]:
subject = "subject:asianamericans"
articleSearchQuery = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?fq={subject}&api-key={key}'
r = requests.get(articleSearchQuery)
pprint(r.json())

{'copyright': 'Copyright (c) 2021 The New York Times Company. All Rights '
              'Reserved.',
 'response': {'docs': [], 'meta': {'hits': 0, 'offset': 0, 'time': 72}},
 'status': 'OK'}


Search for documents containing Asian Americans 130882 hits -> lots of articles. Find out the exact subject is Asian-Americans

In [10]:
documentQuery = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q=asian+americans&api-key={key}'
r = requests.get(documentQuery)
pprint(r.json())

{'copyright': 'Copyright (c) 2021 The New York Times Company. All Rights '
              'Reserved.',
 'response': {'docs': [{'_id': 'nyt://article/a6f1bff4-8256-53e8-ae8d-9d98bb7e6a61',
                        'abstract': 'A string of attacks against older people '
                                    'of Asian descent has led to calls for '
                                    'more police officers, an idea rejected by '
                                    'the city’s Asian American leaders.',
                        'byline': {'organization': None,
                                   'original': 'By Thomas Fuller',
                                   'person': [{'firstname': 'Thomas',
                                               'lastname': 'Fuller',
                                               'middlename': None,
                                               'organization': '',
                                               'qualifier': None,
                                      

Test with subject in Article Search API after figuring out the main subject sent back in the above section -> return results with 1618 hits

In [11]:
subject = "subject:Asian-Americans"
articleSearchQuery = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?fq={subject}&api-key={key}'
r = requests.get(articleSearchQuery)
pprint(r.json())

{'copyright': 'Copyright (c) 2021 The New York Times Company. All Rights '
              'Reserved.',
 'response': {'docs': [{'_id': 'nyt://article/27188319-b6c9-5f3e-abac-ddbed4551013',
                        'abstract': 'Anthony Veasna So died before the release '
                                    'of his first book, “Afterparties,” but '
                                    'his loved ones, mentors and newfound fans '
                                    'are making it a particularly significant '
                                    'debut.',
                        'byline': {'organization': None,
                                   'original': 'By Andrew LaVallee',
                                   'person': [{'firstname': 'Andrew',
                                               'lastname': 'LaVallee',
                                               'middlename': None,
                                               'organization': '',
                                              

Information that would be of great interests:

- pub_date: publication date formatted "2021-06-22T18:32:40+0000"
- web_url: the original link to the paper, might be of importance
- byline: person: firstname, lastname: the author of the article
- document_type: might be article or multimedia
- headline: main: the title of the paper
- keywords: value, name: value is the topic, name is the category: person, subject, etc -> Can do subject category comparison what is the subject that often appears the most

-> Next step:

- Why is the Asian Americans hits more?
- Parsing the json file to get into CSV, build method for 6 seconds queries to get 10 per time

Try to get the last page with 1618 hit and 10 result returned per page -> page 161?

In [12]:
subject = "subject:Asian-Americans"
articleSearchQuery = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?fq={subject}&page=161&api-key={key}'
r = requests.get(articleSearchQuery)
pprint(r.json())

{'copyright': 'Copyright (c) 2021 The New York Times Company. All Rights '
              'Reserved.',
 'response': {'docs': [{'_id': 'nyt://article/0dca48ae-4e8a-5657-b256-b26f1602eadb',
                        'abstract': 'LEAD: To the Editor:',
                        'byline': {'organization': None,
                                   'original': None,
                                   'person': []},
                        'document_type': 'article',
                        'headline': {'content_kicker': None,
                                     'kicker': 'THE NEW PROTESTANT ETHIC IS '
                                               'THE ASIAN ETHIC',
                                     'main': 'Parents Not the Answer',
                                     'name': None,
                                     'print_headline': 'THE NEW PROTESTANT '
                                                       'ETHIC IS THE ASIAN '
                                                       'ETHI

Set up config parser

In [4]:
import os
import configparser

configs = configparser.ConfigParser()
# Get the current directory to the main file README.md
currentDir = os.path.dirname("README.md")
# Get the path file to the config file
configDir = os.path.join(currentDir, "config/settings.cfg")
configs.read(configDir)
apiKey = configs.get("nytimes", "api_key")

In [7]:
import requests
subject = "subject:Asian-Americans"
articleSearchQuery = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?fq={subject}&page=161&api-key={apiKey}'
r = requests.get(articleSearchQuery)

OK


In [10]:
print(r.json()['response']['meta']['hits'])

1618
