# NOAA.GOV API References

#### https://www.ndbc.noaa.gov/faq/measdes.shtml
#### https://www.ncei.noaa.gov/access/search/data-search/normals-monthly-1991-2020
#### https://www.ncdc.noaa.gov/cdo-web/webservices/v2#gettingStarted
#### https://www.ncei.noaa.gov/data/gsom/doc/GSOM_documentation.pdf

In [50]:
# Import Dependencies

import json
import requests
from pprint import pprint
import pandas as pd
import csv
from config import api_token

## Extract NOAA.GOV API Weather Data

In [2]:
# Pull API for Stations

api_stations_url = 'https://www.ncei.noaa.gov/cdo-web/api/v2/stations'

url=f"{api_stations_url}"
headers = {'token': api_token}
response = requests.get(url, headers=headers)
stations=response.json()
all_stations=stations['results']
# pprint (all_stations)

station_id=[]
station_name=[]
elevation_meters=[]
latitude=[]
longitude=[]
min_date=[]
max_date=[]

for station in all_stations:
    try:
        station_id.append(station['id'])
        station_name.append(station['name'])
        elevation_meters.append(station['elevation'])
        latitude.append(station['latitude'])
        longitude.append(station['longitude'])
        min_date.append(station['mindate'])
        max_date.append(station['maxdate'])

    except (TypeError):
        station_id.append('NA')
        station_name.append('NA')
        elevation_meters.append('NA')
        latitude.append('NA')
        longitude.append('NA')
        min_date.append('NA')
        max_date.append('NA')

station_data={'Station':station_id,"Station_name":station_name,"Elevation(meters)":elevation_meters,\
                        'Latitude':latitude,'Longitude':longitude,"min_date":min_date,'max_date':max_date}

In [3]:
station_data_df=pd.DataFrame(station_data)
station_data_df.head(20)


Unnamed: 0,Station,Station_name,Elevation(meters),Latitude,Longitude,min_date,max_date
0,COOP:010008,"ABBEVILLE, AL US",139.0,31.5702,-85.2482,1948-01-01,2014-01-01
1,COOP:010063,"ADDISON, AL US",239.6,34.21096,-87.17838,1938-01-01,2015-11-01
2,COOP:010071,"ADDISON CENTRAL TOWER, AL US",302.1,34.41667,-87.31667,1940-05-01,1962-03-01
3,COOP:010116,"ALABASTER SHELBY CO AIRPORT ASOS, AL US",172.5,33.17835,-86.78178,1995-04-01,2015-11-01
4,COOP:010117,"BELLE MINA 2 N, AL US",183.8,34.6891,-86.8819,1949-01-01,1949-12-01
5,COOP:010125,"ALAGA, AL US",34.1,31.13333,-85.06667,1935-05-01,1936-11-01
6,COOP:010140,"ALBERTA, AL US",53.3,32.2322,-87.4104,1940-11-01,2014-12-01
7,COOP:010148,"ALBERTVILLE, AL US",348.1,34.23333,-86.16667,1931-01-01,1977-06-01
8,COOP:010160,"ALEXANDER CITY, AL US",201.2,32.935,-85.95556,1969-10-01,2015-11-01
9,COOP:010163,"ALEXANDER CITY 6 NE, AL US",200.9,32.98333,-85.86667,1942-11-01,1969-10-01


In [61]:
# station_data_df.to_csv('stations.csv',index=False)

In [53]:
# Pull API Data for 23 Weather Variables
station=[]
datatype=[]
date=[]
value=[]

for year in range(2012, 2023):
    x=str(year)

    for month in range(1,13):
        y = str(month).zfill(2)
        url_prcp=f"https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GSOM&stationid=GHCND:USW00094789&units=standard&startdate={x}-{y}-01&enddate={x}-{y}-01"
        headers = {'token': api_token}


        response = requests.get(url_prcp, headers=headers).json()
        

        for dtype in range(0,22):
           
            
            try:
                station.append(response['results'][dtype]['station'])
                datatype.append(response['results'][dtype]['datatype'])
                date.append(response['results'][dtype]['date'])
                value.append(response['results'][dtype]['value'])
        
            except json.JSONDecodeError:
                station.append("NA")
                datatype.append("NA")
                date.append("NA")
                value.append("NA")

weather_data={'Station':station,"Data_Type":datatype,"Date":date,'Value':value}


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [46]:
weather_data_df=pd.DataFrame(weather_data)
weather_data_df.tail(20)

Unnamed: 0,Station,Data_Type,Date,Value
2884,GHCND:USW00094789,CLDD,2022-12-01T00:00:00,0.0
2885,GHCND:USW00094789,DP01,2022-12-01T00:00:00,10.0
2886,GHCND:USW00094789,DP10,2022-12-01T00:00:00,9.0
2887,GHCND:USW00094789,DP1X,2022-12-01T00:00:00,0.0
2888,GHCND:USW00094789,DSND,2022-12-01T00:00:00,0.0
2889,GHCND:USW00094789,DSNW,2022-12-01T00:00:00,0.0
2890,GHCND:USW00094789,DT00,2022-12-01T00:00:00,0.0
2891,GHCND:USW00094789,DT32,2022-12-01T00:00:00,18.0
2892,GHCND:USW00094789,DX32,2022-12-01T00:00:00,3.0
2893,GHCND:USW00094789,DX70,2022-12-01T00:00:00,0.0


In [49]:
weather_data_df.to_csv('weather_noaa.csv',index=False)

## Transform Data (Using Pandas DataFrame)

In [None]:
# Merge DataFrames (df_stations, df_cloud, df_temp, df_prcp, df_snow) on Station ID
# Create separate ipynb files (or other preferred methods) to clean data, remove duplicate and null rows or columns
# Final DataFrame should have these columns:
# STATIONNAME, LATITUDE, LONGITUDE, ELEVATION (in meters above Sea Level), DATE (month, year),MLY-CLDD-NORMAL (Avg Monthly Clouds),MLY-PRCP-NORMAL (Avg Monthly Rain),MLY-SNOW-NORMAL (Avg Monthly Snow), MLY-TAVG-NORMAL (Avg Monthly Temperature C)


## Load Data

In [None]:

# 1. Load Clean DataFrame to CSV File
# 2. Load CSV file into Postgres SQL --> to create a database for all data used in Project
# 3. Load CSV file into AWS bucket (?) --> to be used as url site in neural prediction analysis using Spark (google Colab or Data Bricks)

