In [2]:
import requests
def make_request(endpoint, payload=None):
 """
 Make a request to a specific endpoint on the weather API
 passing headers and optional payload.

 Parameters:
 - endpoint: The endpoint of the API you want to
 make a GET request to.
 - payload: A dictionary of data to pass along
 with the request.

 Returns:
 Response object.
 """
 return requests.get(
 f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
 headers={
 'token': 'EsZYHMNAYEfupllWFTOyrItclNJQUwEi'
 },
 params=payload
 )

In [3]:
response = make_request('datasets', {'startdate':'2018-10-01'})
response.status_code


200

In [4]:
response.json().keys()


dict_keys(['metadata', 'results'])

The metadata of the JSON response will tell us information about the request and data we got back:


In [5]:
response.json()['metadata']

{'resultset': {'offset': 1, 'count': 11, 'limit': 25}}

Figure out what data is in the result
The results key contains the data we requested. This is a list of what would be rows in our dataframe. Each entry in the list is a dictionary, so we can look at the keys to get
the fields:

In [6]:
 response.json()['results'][0].keys()

dict_keys(['uid', 'mindate', 'maxdate', 'name', 'datacoverage', 'id'])

Take and parse the results since we only want Id and Name from data

In [8]:
#All the results
response.json()['results']

[{'uid': 'gov.noaa.ncdc:C00861',
  'mindate': '1750-02-01',
  'maxdate': '2024-03-10',
  'name': 'Daily Summaries',
  'datacoverage': 1,
  'id': 'GHCND'},
 {'uid': 'gov.noaa.ncdc:C00946',
  'mindate': '1750-02-01',
  'maxdate': '2024-02-01',
  'name': 'Global Summary of the Month',
  'datacoverage': 1,
  'id': 'GSOM'},
 {'uid': 'gov.noaa.ncdc:C00947',
  'mindate': '1763-01-01',
  'maxdate': '2024-01-01',
  'name': 'Global Summary of the Year',
  'datacoverage': 1,
  'id': 'GSOY'},
 {'uid': 'gov.noaa.ncdc:C00345',
  'mindate': '1991-06-05',
  'maxdate': '2024-03-11',
  'name': 'Weather Radar (Level II)',
  'datacoverage': 0.95,
  'id': 'NEXRAD2'},
 {'uid': 'gov.noaa.ncdc:C00708',
  'mindate': '1994-05-20',
  'maxdate': '2024-03-09',
  'name': 'Weather Radar (Level III)',
  'datacoverage': 0.95,
  'id': 'NEXRAD3'},
 {'uid': 'gov.noaa.ncdc:C00821',
  'mindate': '2010-01-01',
  'maxdate': '2010-01-01',
  'name': 'Normals Annual/Seasonal',
  'datacoverage': 1,
  'id': 'NORMAL_ANN'},
 {'uid'

In [7]:
[(data['id'], data['name']) for data in response.json()['results']]
#This is the parsed results

[('GHCND', 'Daily Summaries'),
 ('GSOM', 'Global Summary of the Month'),
 ('GSOY', 'Global Summary of the Year'),
 ('NEXRAD2', 'Weather Radar (Level II)'),
 ('NEXRAD3', 'Weather Radar (Level III)'),
 ('NORMAL_ANN', 'Normals Annual/Seasonal'),
 ('NORMAL_DLY', 'Normals Daily'),
 ('NORMAL_HLY', 'Normals Hourly'),
 ('NORMAL_MLY', 'Normals Monthly'),
 ('PRECIP_15', 'Precipitation 15 Minute'),
 ('PRECIP_HLY', 'Precipitation Hourly')]

In [21]:
#We get the data type for Id temp
response = make_request(
 'datatypes',
 payload={
 'datacategoryid' : 'TEMP',
 'limit' : 100
 }
)
response.status_code


200

In [22]:
[(datatype['id'], datatype['name']) for datatype in response.json()['results']][-5:] # So dito same lang dun sa kanina but instead we get last 5 items ng id and name

[('MNTM', 'Monthly mean temperature'),
 ('TAVG', 'Average Temperature.'),
 ('TMAX', 'Maximum temperature'),
 ('TMIN', 'Minimum temperature'),
 ('TOBS', 'Temperature at the time of observation')]

Next is location

In [32]:
#Make another request for location
response = make_request(
 'locationcategories',
 {
 'datasetid' : 'GHCND'
 }
)
response.status_code



200

In [27]:
import pprint
pprint.pprint(response.json()['results'])

[{'id': 'CITY', 'name': 'City'},
 {'id': 'CLIM_DIV', 'name': 'Climate Division'},
 {'id': 'CLIM_REG', 'name': 'Climate Region'},
 {'id': 'CNTRY', 'name': 'Country'},
 {'id': 'CNTY', 'name': 'County'},
 {'id': 'HYD_ACC', 'name': 'Hydrologic Accounting Unit'},
 {'id': 'HYD_CAT', 'name': 'Hydrologic Cataloging Unit'},
 {'id': 'HYD_REG', 'name': 'Hydrologic Region'},
 {'id': 'HYD_SUB', 'name': 'Hydrologic Subregion'},
 {'id': 'ST', 'name': 'State'},
 {'id': 'US_TERR', 'name': 'US Territory'},
 {'id': 'ZIP', 'name': 'Zip Code'}]


In [31]:
pprint.pprint(response.json()['results'][1])

{'id': 'CLIM_DIV', 'name': 'Climate Division'}


# GET NYC LOCATION USING BINARY SEARCH

In [33]:
def get_item(name, what, endpoint, start = 1, end = None):
  mid = (start + (end if end else 1))//2

  name = name.lower()

  payload = {
      'datasetid' : 'GHCND',
      'sortfield' : 'name',
      'offset' : mid,
      'limit' : 1
  }

  response = make_request(endpoint, {**payload, **what})

  if response.ok:
    end = end if end else response.json(['metadata']['resultset']['count'])

    current_name = response.json()['results'][0]['name'].lower()

    if name in current_name:
      return response.json()['results'][0]
    else:
      if start >= end:
        return {}
      elif name < current_name:
        return get_item(name,what,endpoint,start,mid-1)
      elif name > current_name:
        return get_item(name, what, endpoint, mid + 1, end)
  else:
    print(f'Response not OK, status: {response.status_code}')

def get_location(name):
  return get_item(name, {'locationcategoryid' : 'CITY'}, 'locations')

  end = end if end else response.json(['metadata']['resultset']['count'])


In [34]:
#Find NYC
nyc = get_location('New York')
nyc


TypeError: list indices must be integers or slices, not str