## API exploration and limitations
- Topic: Enviroment and Climate Change

### NYT API

In [None]:
import requests
import os
url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json'
headers = {
    "Accept" : "application/json"
}
parameters = {
    "api-key" : os.getenv("NYT_API_KEY"),
    "q" : "climate change"
}

r = requests.get(
    url = url,
    params = parameters,
    headers = headers
)
data = r.json()


#### exploring the response

In [8]:
data.keys()

dict_keys(['status', 'copyright', 'response'])

In [12]:
data['response'].keys()

dict_keys(['docs', 'metadata'])

how many articles does it produce?

In [14]:
num_articles = len(data['response']['docs'])
print(f"the number of articles in the response is {num_articles}")

the number of articles in the response is 10


what is the information inside each of the articles

In [15]:
articles = data['response']['docs']
print(f"the type of the articles is {type(articles)}")

the type of the articles is <class 'list'>


In [19]:
sample_article = articles[0]
for k in sample_article.keys():
    print(f"'{k}' : {sample_article[k]}")

'abstract' : Extreme weather events — deadly heat waves, floods, fires and hurricanes — are the consequences of a warming planet, scientists say.
'byline' : {'original': 'By David Gelles and Austyn Gaffney'}
'document_type' : article
'headline' : {'main': '‘We’re in a New Era’: How Climate Change Is Supercharging Disasters', 'kicker': '', 'print_headline': 'Fires in Los Angeles Area Are Grim Look Into Future'}
'_id' : nyt://article/a09b3cd3-63b9-5df7-8563-a88bde75361f
'keywords' : [{'name': 'Subject', 'value': 'Wildfires', 'rank': 1}, {'name': 'Subject', 'value': 'Global Warming', 'rank': 2}, {'name': 'Subject', 'value': 'Fires and Firefighters', 'rank': 3}, {'name': 'Subject', 'value': 'Southern California Wildfires (Jan 2025)', 'rank': 4}, {'name': 'Location', 'value': 'Los Angeles (Calif)', 'rank': 5}, {'name': 'Subject', 'value': 'Hurricanes and Tropical Storms', 'rank': 6}, {'name': 'Subject', 'value': 'Floods', 'rank': 7}, {'name': 'Subject', 'value': 'Heat and Heat Waves', 'rank

relevant fields from articles
- abstract
- byline (authors)
- headline (main, print_headline)
- pub_date,
- source
- web_url
- word_count

In [21]:
data['response']['metadata']

{'hits': 10000, 'offset': 0, 'time': 12}

### refining the query

In [None]:
import requests
import os
url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json'
headers = {
    "Accept" : "application/json"
}
parameters = {
    "api-key" : os.getenv("NYT_API_KEY"),
    "q" : "climate change", # what are the articles about?
    "sort" : "newest", # available options: best (default), newest, oldest, relevance
    "begin_date" : "20200101", # format (YYYYMMDD)
    "end_date" : "20250401", # format (YYYYMMDD)
    "fq" : 'desk:("Climate", "Foreign") AND section.name:("Climate", "Science") AND type:("Article")',
    # the special quality about this filter is the "" on the fields and the filters available,
    # types of fields can be found in https://developer.nytimes.com/docs/articlesearch-product/1/overview
}

r = requests.get(
    url = url,
    params = parameters,
    headers = headers
)

r.json()

{'status': 'OK',
 'copyright': 'Copyright (c) 2025 The New York Times Company. All Rights Reserved.',
 'response': {'docs': [{'abstract': 'The exhibits were dedicated to the agency’s history. Mr. Zeldin said closing the collection would save $600,000 annually.',
    'byline': {'original': 'By Lisa Friedman'},
    'document_type': 'article',
    'headline': {'main': 'Lee Zeldin, E.P.A. Head, Shuts National Environmental Museum',
     'kicker': '',
     'print_headline': ''},
    '_id': 'nyt://article/a83470c0-6d26-57c7-8f60-024dc28a5522',
    'keywords': [{'name': 'Subject', 'value': 'Global Warming', 'rank': 1},
     {'name': 'Subject', 'value': 'Greenhouse Gas Emissions', 'rank': 2},
     {'name': 'Subject', 'value': 'Museums', 'rank': 3},
     {'name': 'Subject', 'value': 'Environment', 'rank': 4},
     {'name': 'Subject', 'value': 'Presidential Election of 2024', 'rank': 5},
     {'name': 'Organization',
      'value': 'Environmental Protection Agency',
      'rank': 6},
     {'name

### creating the class object

In [19]:
import os
import requests

class NYTnews:
    """
    This class consumes news, artciles, and other media from the New York Times API
    Future improvements:
        - hide the api key attribute
    """
    def __init__(
        self,
        api_key : str,
        query : str,
        sort : str = 'newest',
        begin_date : str = '20200101',
        end_date : str = '20250401'
    ):
        # query parameters
        self.api_key, self.query, self.sort, self.begin_date, self.end_date = api_key, query, sort, begin_date, end_date
        # url endpoint
        self.endpoint = 'https://api.nytimes.com/svc/search/v2/articlesearch.json'
        # headers
        self.headers = {
            "Accept" : "application/json"
        }
        # query parameters
        self.parameters = {
            "api-key" : self.api_key,
            "q" : self.query, # what are the articles about?
            "sort" : self.sort, # available options: best (default), newest, oldest, relevance
            "begin_date" : self.begin_date, # format (YYYYMMDD)
            "end_date" : self.end_date,
            "fq" : 'type:("Article")' # special parameters that allows granular filters
            # types of fields can be found in https://developer.nytimes.com/docs/articlesearch-product/1/overview
        }
        # placeholders for future attributes
        self.news_list = None
        self.news_urls = None
        self.news_authors = None
    # class methods
    def consume_endpoint(self):
        """
        Generates the list of news according to the query parameters
        """
        try:
            response = requests.get(
                url = self.endpoint,
                params = self.parameters,
                headers = self.headers
            )
            if response.status_code == 400:
                raise Exception('Invalid query parameters')
            if response.status_code == 401:
                raise Exception('Invalid API Key!')
            if response.status_code == 429:
                raise Exception('Daily limit reached')
            self.news_list = response.json()['response']['docs']
        except Exception as e:
            print(e)
    def get_total_news(self):
        """
        total number of news from the query
        """
        if self.news_list is not None:
            return len(self.news_list)
        else:
            print(f"Endpoint needs to consumed first")
    def get_news_urls(self):
        """
        creates the attribute where all the url links can be listed
        """
        try:
            if self.news_list is None:
                raise Exception("Endpoint needs to be consumed first")
            if len(self.news_list) == 0:
                raise Exception("There are no news in for this query")
            self.news_urls = [d['web_url'] for d in self.news_list]
            return self.news_urls
        except Exception as e:
            print(e)
    def get_news_authors(self):
        """
        creates the dictionary of headline and authors
        """
        try:
            if self.news_list is None:
                raise Exception("Endpoint needs to be consumed first")
            if len(self.news_list) == 0:
                raise Exception("There are no news in for this query")
            self.news_authors = {
                d['headline']['print_headline'] : d['byline']['original'] for d in self.news_list
            }
            return self.news_authors
        except Exception as e:
            print(e)
    def get_snippet_word_count(self):
        """
        return a dictionary of the news snippet and the wordcount
        """
        try:
            if self.news_list is None:
                raise Exception("Endpoint needs to be consumed first")
            if len(self.news_list) == 0:
                raise Exception("There are no news in for this query")
            self.summary_and_wordcount = {
                d['snippet'] : d['word_count'] for d in self.news_list
            }
            return self.summary_and_wordcount
        except Exception as e:
            print(e)

            
news = NYTnews(
    api_key = os.getenv('NYT_API_KEY'),
    query = 'Technology'
)
news.consume_endpoint()
for key, value in news.get_news_authors().items():
    print(f"headline: '{key}' : author: '{value}'")


headline: '' : author: 'By Benjamin Mueller'
headline: 'OpenAI Completes Deal That Values Company at $300 Billion' : author: 'By Cade Metz'
headline: 'Building a Farmhouse With a Pole-Barn Vibe' : author: 'By Tim McKeough'
headline: 'Experts See Science Cuts As a Big Risk' : author: 'By Ben Casselman'
headline: 'Pitch on Tariffs  Is That People  Can Take Pain' : author: 'By Alan Rappeport'
headline: 'Google’s A.I. Drug Design Lab, Isomorphic, Raises $600 Million' : author: 'By Michael J. de la Merced'
headline: 'A Chinese Truck Maker Wants the Green Light' : author: 'By Daisuke Wakabayashi'


next steps:
1. create the object
   1. attributes: the parameters of the query
   2. methods: get the the information from the articles
2. implement error handling with the responses

### trying out the custom classes

In [20]:
import entities.news_article 
import aggregator.api_client 
import entities.user_input
from importlib import reload
reload(entities.news_article)
reload(aggregator.api_client)
reload(entities.user_input)

<module 'entities.user_input' from '/Users/santiagocardenas/Documents/MDSI/202501/python programming/InfoAggregatorFinalProject/entities/user_input.py'>

In [21]:
from entities.news_article import NYTArticle
from aggregator.api_client import NYTNewsArticles
from entities.user_input import UserInput
from os import getenv

user_input = UserInput(
    category = 'Technology',
    source = 'The New York Times'
)

test = NYTNewsArticles(
    api_key = getenv("NYT_API_KEY"),
    base_url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json'
)

articles = test.fetch_articles(
    user_input = user_input
)

for a in articles:
    print(f"title '{a.title}' : author: '{a.author}'")

title '' : author: 'By Kenneth Chang'
title 'OpenAI Completes Deal That Values Company at $300 Billion' : author: 'By Cade Metz'
title '' : author: 'By Jack Tamisiea'
title '' : author: 'By Emmett Lindner'
title 'Building a Farmhouse With a Pole-Barn Vibe' : author: 'By Tim McKeough'
title 'Experts See Science Cuts As a Big Risk' : author: 'By Ben Casselman'
title '' : author: 'By Benjamin Mueller'
title 'Pitch on Tariffs  Is That People  Can Take Pain' : author: 'By Alan Rappeport'
title 'Google’s A.I. Drug Design Lab, Isomorphic, Raises $600 Million' : author: 'By Michael J. de la Merced'
title 'A Chinese Truck Maker Wants the Green Light' : author: 'By Daisuke Wakabayashi'
