# Weather Stats from NOAA

In [1]:
#dependancies
import requests
import pandas as pd
import json
from datetime import datetime
import time
from concurrent.futures import ThreadPoolExecutor

#API
from config import ncdc_api
headers = {"token":ncdc_api} #token from https://www.ncdc.noaa.gov/cdo-web/token

Original Code<br>
    Pulling data with this original code has approximately a 50/50 chance of success. While it does work, a more efficient method for acquiring the requested data was sought after. 

In [None]:
'''#get a list of states and their ids
url="https://www.ncei.noaa.gov/cdo-web/api/v2/locations?locationcategoryid=ST&limit=52"
r = requests.get(url, "dataset", headers = headers).text
states = json.loads(r) 

#make an api call to get temperature average from Summary of the Year dataset for every station in each state
for i in states['results']:
    if i['id']!="FIPS:11": #breaks on District of Columbia(FIPS:11)? skipping it
        #print(i['name'])
        datasetid='GSOY' #Global Summary of the Year, contains a yearly resolution of meteorological elements 
        datatypeid = "TAVG" #TAVG TMAX TMIN
        locationid=i['id'] #assigning state id which returns all weather stations in its boundry
        units='standard' #standard or metric
        startdate="2021-01-01"
        enddate="2021-12-31" #code cant handle more then one year at a time
        limit="1000" #max is 1000, default is 25

        #make the api call
        url= f"https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid={datasetid}&datatypeid={datatypeid}&locationid={locationid}&units={units}&startdate={startdate}&enddate={enddate}&limit={limit}"
        r = requests.get(url, "dataset", headers = headers).text
        response = json.loads(r)
        
        #get the temp from each weather station, add them up then find average
        tavg=0
        for i in response['results']:
            #print(i['value']) #print value from each station
            tavg += i['value'] 
        tavg = tavg/len(response['results'])
        #print (tavg)
        
        time.sleep(5) #api allows up to 5 requests per second but seems to hang sometimes, limiting to 1 per second
        weather_data.append({"State": state,
                     "Average Temp (2021)": tavg})'''

More Efficient Code<br>
This code was written with the assistance of an AI language model.<br>
Using the base of the previous code, the "concurrent.futures" module allows for multiple API requests simultaneously, as opposed to running a "for" loop through thousands of rows.

In [5]:
#define function to fetch temperature data per state
def fetch_temperature_data(state):
    #existing code from original
    #some NOAA specific variable calls
    datasetid = 'GSOY'
    datatypeid = 'TAVG'
    locationid = state['id']
    units = 'standard'
    startdate = '2021-01-01'
    enddate = '2021-12-31'
    limit = '1000'
    
    #NOAA url with inlaid variables (API call)
    url = f"https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid={datasetid}&datatypeid={datatypeid}&locationid={locationid}&units={units}&startdate={startdate}&enddate={enddate}&limit={limit}"
    r = requests.get(url, headers=headers).text
    response = json.loads(r)
    
    #set temp avg at 0
    tavg = 0
    if "results" in response:
        for result in response['results']:
            tavg += result['value']
        #find avg of all NOAA stations' temp data per state
        tavg = tavg / len(response['results'])
    else:
        tavg=None
    time.sleep(2)  # Limit to 1 request per second
    
    return {"State": state['name'], "Average Temp (2021)": tavg}

if __name__ == '__main__':
    # Get a list of states and their ids
    state_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/locations?locationcategoryid=ST&limit=52"
    state_response = requests.get(state_url, headers=headers).text
    states = json.loads(state_response)['results']

    # Use ThreadPoolExecutor to fetch temperature data for each state in parallel
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust max_workers as needed
        weather_data = list(executor.map(fetch_temperature_data, states))

    # At this point, weather_data contains the temperature data for all states

In [13]:
#ensure data pulled successfully and create df
weather_data_df = pd.DataFrame(weather_data)
weather_data_df.head()

Unnamed: 0,State,Average Temp (2021)
0,Alabama,63.755
1,Alaska,30.352143
2,Arizona,59.975949
3,Arkansas,61.046835
4,California,59.257534


In [9]:
#save to csv for merging
weather_data_df.to_csv("weather.csv", index_label="State")