# NOAA.GOV API References

#### https://www.ndbc.noaa.gov/faq/measdes.shtml
#### https://www.ncei.noaa.gov/access/search/data-search/normals-monthly-1991-2020
#### https://www.ncdc.noaa.gov/cdo-web/webservices/v2#gettingStarted
#### https://www.ncei.noaa.gov/data/gsom/doc/GSOM_documentation.pdf

In [108]:
# Import Dependencies

import json
import requests
from pprint import pprint
import pandas as pd
import csv
from api_keys import noaa_token

## Extract NOAA.GOV API Weather Data

In [93]:
# Pull API for Stations

api_stations_url = 'https://www.ncei.noaa.gov/cdo-web/api/v2/stations'

url=f"{api_stations_url}"
headers = {'token': noaa_token}
response = requests.get(url, headers=headers)
stations=response.json()
all_stations=stations['results']
# pprint (all_stations)

station_id=[]
station_name=[]
elevation_meters=[]
latitude=[]
longitude=[]
min_date=[]
max_date=[]

for station in all_stations:
    for row in range(0,150000):
        try:
            station_id.append(station['id'])
            station_name.append(station['name'])
            elevation_meters.append(station['elevation'])
            latitude.append(station['latitude'])
            longitude.append(station['longitude'])
            min_date.append(station['mindate'])
            max_date.append(station['maxdate'])

        except IndexError:
            station_id.append('NA')
            station_name.append('NA')
            elevation_meters.append('NA')
            latitude.append('NA')
            longitude.append('NA')
            min_date.append('NA')
            max_date.append('NA')
        
        except TypeError:
            station_id.append('NA')
            station_name.append('NA')
            elevation_meters.append('NA')
            latitude.append('NA')
            longitude.append('NA')
            min_date.append('NA')
            max_date.append('NA')

station_data={'Station':station_id,"Station_name":station_name,"Elevation(meters)":elevation_meters,\
                        'Latitude':latitude,'Longitude':longitude,"min_date":min_date,'max_date':max_date}

In [94]:
station_data_df=pd.DataFrame(station_data)
station_data_df.head(200000)


Unnamed: 0,Station,Station_name,Elevation(meters),Latitude,Longitude,min_date,max_date
0,COOP:010008,"ABBEVILLE, AL US",139.0,31.57020,-85.24820,1948-01-01,2014-01-01
1,COOP:010008,"ABBEVILLE, AL US",139.0,31.57020,-85.24820,1948-01-01,2014-01-01
2,COOP:010008,"ABBEVILLE, AL US",139.0,31.57020,-85.24820,1948-01-01,2014-01-01
3,COOP:010008,"ABBEVILLE, AL US",139.0,31.57020,-85.24820,1948-01-01,2014-01-01
4,COOP:010008,"ABBEVILLE, AL US",139.0,31.57020,-85.24820,1948-01-01,2014-01-01
...,...,...,...,...,...,...,...
199995,COOP:010063,"ADDISON, AL US",239.6,34.21096,-87.17838,1938-01-01,2015-11-01
199996,COOP:010063,"ADDISON, AL US",239.6,34.21096,-87.17838,1938-01-01,2015-11-01
199997,COOP:010063,"ADDISON, AL US",239.6,34.21096,-87.17838,1938-01-01,2015-11-01
199998,COOP:010063,"ADDISON, AL US",239.6,34.21096,-87.17838,1938-01-01,2015-11-01


In [61]:
# station_data_df.to_csv('stations.csv',index=False)

In [109]:
# Pull API Data for 23 Weather Variables
station=[]
datatype=[]
date=[]
value=[]

for year in range(2010, 2022):
    x=str(year)

    for month in range(1,13):
        y = str(month).zfill(2)
        url_prcp=f"https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GSOM&stationid=GHCND:USW00094789&units=metric&startdate={x}-{y}-01&enddate={x}-{y}-01"
        headers = {'token': noaa_token}


        response = requests.get(url_prcp, headers=headers).json()
        

        for dtype in range(0,22):
           
            
            try:
                station.append(response['results'][dtype]['station'])
                datatype.append(response['results'][dtype]['datatype'])
                date.append(response['results'][dtype]['date'])
                value.append(response['results'][dtype]['value'])
        
            except json.JSONDecodeError:
                station.append("NA")
                datatype.append("NA")
                date.append("NA")
                value.append("NA")

weather_data={'Station':station,"Data_Type":datatype,"Date":date,'Value':value}


In [110]:
weather_data_df=pd.DataFrame(weather_data)
weather_data_df.tail(20)

Unnamed: 0,Station,Data_Type,Date,Value
3148,GHCND:USW00094789,ASTP,2021-12-01T00:00:00,1016.5
3149,GHCND:USW00094789,AWBT,2021-12-01T00:00:00,3.79
3150,GHCND:USW00094789,AWND,2021-12-01T00:00:00,4.8
3151,GHCND:USW00094789,CDSD,2021-12-01T00:00:00,645.6
3152,GHCND:USW00094789,CLDD,2021-12-01T00:00:00,0.0
3153,GHCND:USW00094789,DP01,2021-12-01T00:00:00,14.0
3154,GHCND:USW00094789,DP10,2021-12-01T00:00:00,5.0
3155,GHCND:USW00094789,DP1X,2021-12-01T00:00:00,0.0
3156,GHCND:USW00094789,DSND,2021-12-01T00:00:00,0.0
3157,GHCND:USW00094789,DSNW,2021-12-01T00:00:00,0.0


In [9]:
# weather_data_df.to_csv('weather_noaa2.csv',index=False)

In [95]:
# Datatypes
id=[]
maxdate=[]
mindate=[]
name=[]
    
url_prcp=f"https://www.ncei.noaa.gov/cdo-web/api/v2/datatypes"
headers = {'token': noaa_token}


resp = requests.get(url_prcp, headers=headers).json()
datasets=resp['results']

for row in range(0,25):
    
    try:
        id.append(datasets[row]['id'])
        maxdate.append(datasets[row]['maxdate'])
        mindate.append(datasets[row]['mindate'])
        name.append(datasets[row]['name'])
    
    except TypeError:
        id.append('NA')
        maxdate.append('NA')
        mindate.append('NA')
        name.append('NA')

data_type={'Datatype_id':id,"maxdate":maxdate,"mindate":mindate,'name':name}

In [96]:
data_type_df=pd.DataFrame(data_type)
data_type_df.head(30)

Unnamed: 0,Datatype_id,maxdate,mindate,name
0,ACMC,1996-05-28,1994-03-19,Average cloudiness midnight to midnight from 3...
1,ACMH,2005-12-31,1965-01-01,Average cloudiness midnight to midnight from m...
2,ACSC,1996-05-28,1994-02-01,Average cloudiness sunrise to sunset from 30-s...
3,ACSH,2005-12-31,1965-01-01,Average cloudiness sunrise to sunset from manu...
4,ALL,2023-05-31,1991-06-05,Base Data
5,ANN-CLDD-BASE45,2010-01-01,2010-01-01,Long-term averages of annual cooling degree da...
6,ANN-CLDD-BASE50,2010-01-01,2010-01-01,Long-term averages of annual cooling degree da...
7,ANN-CLDD-BASE55,2010-01-01,2010-01-01,Long-term averages of annual cooling degree da...
8,ANN-CLDD-BASE57,2010-01-01,2010-01-01,Long-term averages of annual cooling degree da...
9,ANN-CLDD-BASE60,2010-01-01,2010-01-01,Long-term averages of annual cooling degree da...


In [117]:
#Datasets
id=[]
maxdate=[]
mindate=[]
name=[]
uid=[]
    
url_prcp=f"https://www.ncei.noaa.gov/cdo-web/api/v2/datasets"
headers = {'token': noaa_token}


resp1 = requests.get(url_prcp, headers=headers).json()
datasets1=resp1['results']

for row in range(0,11):
    
    try:
        id.append(datasets1[row]['id'])
        maxdate.append(datasets1[row]['maxdate'])
        mindate.append(datasets1[row]['mindate'])
        name.append(datasets1[row]['name'])
        uid.append(datasets1[row]['uid'])
    
    except JSONDecodeError:
        id.append('NA')
        maxdate.append('NA')
        mindate.append('NA')
        name.append('NA')
        uid.append('NA')

datasets1={'Datatype_id':id,"maxdate":maxdate,"mindate":mindate,'name':name,'uid':uid}

In [118]:
dataset_df=pd.DataFrame(datasets1)
dataset_df.head(30)

Unnamed: 0,Datatype_id,maxdate,mindate,name,uid
0,GHCND,2023-06-03,1763-01-01,Daily Summaries,gov.noaa.ncdc:C00861
1,GSOM,2023-05-01,1763-01-01,Global Summary of the Month,gov.noaa.ncdc:C00946
2,GSOY,2023-01-01,1763-01-01,Global Summary of the Year,gov.noaa.ncdc:C00947
3,NEXRAD2,2023-05-31,1991-06-05,Weather Radar (Level II),gov.noaa.ncdc:C00345
4,NEXRAD3,2023-06-03,1994-05-20,Weather Radar (Level III),gov.noaa.ncdc:C00708
5,NORMAL_ANN,2010-01-01,2010-01-01,Normals Annual/Seasonal,gov.noaa.ncdc:C00821
6,NORMAL_DLY,2010-12-31,2010-01-01,Normals Daily,gov.noaa.ncdc:C00823
7,NORMAL_HLY,2010-12-31,2010-01-01,Normals Hourly,gov.noaa.ncdc:C00824
8,NORMAL_MLY,2010-12-01,2010-01-01,Normals Monthly,gov.noaa.ncdc:C00822
9,PRECIP_15,2014-01-01,1970-05-12,Precipitation 15 Minute,gov.noaa.ncdc:C00505


In [99]:
# Data Categories
id=[]
name=[]

    
url_prcp=f"https://www.ncei.noaa.gov/cdo-web/api/v2/datacategories"
headers = {'token': noaa_token}


resp2 = requests.get(url_prcp, headers=headers).json()
datasets2=resp2['results']


for row in range(0,25):
    
    try:
        id.append(datasets2[row]['id'])
        name.append(datasets2[row]['name'])

    
    except JSONDecodeError:
        id.append('NA')
        name.append('NA')

datacategory={'Datacategory_id':id,'name':name}

In [100]:
datacategory_df=pd.DataFrame(datacategory)
datacategory_df.head(30)

Unnamed: 0,Datacategory_id,name
0,ANNAGR,Annual Agricultural
1,ANNDD,Annual Degree Days
2,ANNPRCP,Annual Precipitation
3,ANNTEMP,Annual Temperature
4,AUAGR,Autumn Agricultural
5,AUDD,Autumn Degree Days
6,AUPRCP,Autumn Precipitation
7,AUTEMP,Autumn Temperature
8,COMP,Computed
9,COMPAGR,Computed Agricultural


## Transform Data (Using Pandas DataFrame)

In [None]:
# Merge DataFrames (df_stations, df_cloud, df_temp, df_prcp, df_snow) on Station ID
# Create separate ipynb files (or other preferred methods) to clean data, remove duplicate and null rows or columns
# Final DataFrame should have these columns:
# STATIONNAME, LATITUDE, LONGITUDE, ELEVATION (in meters above Sea Level), DATE (month, year),MLY-CLDD-NORMAL (Avg Monthly Clouds),MLY-PRCP-NORMAL (Avg Monthly Rain),MLY-SNOW-NORMAL (Avg Monthly Snow), MLY-TAVG-NORMAL (Avg Monthly Temperature C)


## Load Data

In [None]:

# 1. Load Clean DataFrame to CSV File
# 2. Load CSV file into Postgres SQL --> to create a database for all data used in Project
# 3. Load CSV file into AWS bucket (?) --> to be used as url site in neural prediction analysis using Spark (google Colab or Data Bricks)

