## Data Retrieval Notebook

In this notebook, we use code provided by the NREL to request and handle the necessary data and metadata from the NOW-23 Great Lakes dataset. The code provided by the NREL has been modified to make the automatic querying and concatenation of data possible.

In [32]:
import requests
import pandas as pd
import numpy as np
import urllib.parse
import time
import os
from dotenv import load_dotenv, dotenv_values

load_dotenv()
API_KEY = os.getenv("API_KEY")
EMAIL = os.getenv("EMAIL")

In [33]:
# The function used to generate the 100 random site ids: 
# np.random.randint(0, 388080, (100))

site_ids = pd.read_csv("site ids", index_col=0)
site_ids = [str(site_ids.iloc[x, 0]) for x in range(100)]

In [None]:
BASE_URL = "https://developer.nrel.gov/api/wind-toolkit/v2/wind/offshore-great-lakes-download.csv?"
POINTS = site_ids
YEARS = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']

# This code is provided by NREL to request and handle data
# Some modifications have been made so that csv requests can be made and appended automatically

def main():
    input_data = {
        'attributes': 'temperature_80m,turbulent_kinetic_energy_80m,winddirection_80m,windspeed_80m',
        'interval': '60',

        'api_key': API_KEY,
        'email': EMAIL,
    }
    
    for id, location_ids in enumerate(POINTS):

        print(f'Making request for point group {id + 1} of {len(POINTS)}...')
        for name in YEARS:
            
            input_data['names'] = name
            input_data['location_ids'] = location_ids

            if '.csv' in BASE_URL:
                url = BASE_URL + urllib.parse.urlencode(input_data, True)
                data = pd.read_csv(url, on_bad_lines='skip', skiprows=2)
                
                print(f'Response data for year {name} and site {location_ids}')

                file_loc = r"Data\NOW-23 Great Lakes [2000-2020] 60min/" + str(location_ids + ".csv")
                
                if name == '2000':
                    data.to_csv(file_loc)
                else:
                    pd.concat([pd.read_csv(file_loc, index_col=0), data]).to_csv(file_loc)
            else:
                headers = {
                    'x-api-key': API_KEY
                }
                data = get_response_json_and_handle_errors(requests.post(BASE_URL, input_data, headers=headers))
                download_url = data['outputs']['downloadUrl']
                # You can do with what you will the download url
                print(data['outputs']['message'])
                print(f"Data can be downloaded from this url when ready: {download_url}")

                data = pd.read_csv(download_url)

                # Delay for 1 second to prevent rate limiting
                time.sleep(1)
            print(f'Processed')


def get_response_json_and_handle_errors(response: requests.Response) -> dict:
    """Takes the given response and handles any errors, along with providing
    the resulting json

    Parameters
    ----------
    response : requests.Response
        The response object

    Returns
    -------
    dict
        The resulting json
    """
    if response.status_code != 200:
        print(f"An error has occurred with the server or the request. The request response code/status: {response.status_code} {response.reason}")
        print(f"The response body: {response.text}")
        exit(1)

    try:
        response_json = response.json()
    except:
        print(f"The response couldn't be parsed as JSON, likely an issue with the server, here is the text: {response.text}")
        exit(1)

    if len(response_json['errors']) > 0:
        errors = '\n'.join(response_json['errors'])
        print(f"The request errored out, here are the errors: {errors}")
        exit(1)
    return response_json

if __name__ == "__main__":
    main()

In [None]:
# This code is modified from the above cell to retrieve metadata for all randomly selected points and concatenate it

BASE_URL = "https://developer.nrel.gov/api/wind-toolkit/v2/wind/offshore-great-lakes-download.csv?"
POINTS = site_ids

input_data = {
    'attributes': 'temperature_80m,turbulent_kinetic_energy_80m,winddirection_80m,windspeed_80m',
    'interval': '60',

    'api_key': API_KEY,
    'email': EMAIL,
}

for id, location_ids in enumerate(POINTS):

    print(f'Making request for point group {id + 1} of {len(POINTS)}...')
    for name in ['2000']:

        input_data['names'] = name
        input_data['location_ids'] = location_ids

        if '.csv' in BASE_URL:
            url = BASE_URL + urllib.parse.urlencode(input_data, True)
            data = pd.read_csv(url, on_bad_lines='skip')

            print(f'Response metadata for year {name} and site {location_ids}')
            
            file_loc = r"Data\NOW-23 Great Lakes [2000-2020] metadata.csv"
            if id == 0:
                data.to_csv(file_loc)
            else:
                pd.concat([pd.read_csv(file_loc, index_col=0), data]).to_csv(file_loc)

In [38]:
import requests
import pandas as pd
import urllib.parse
import time

BASE_URL = "https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-led-conus-download.csv?"
POINTS = site_ids

def main():
    input_data = {
        'attributes': 'temperature_80m,winddirection_80m,windspeed_80m,vertical_windspeed_80m,virtual_potential_temperature_80m',
        'interval': '60',

        'api_key': API_KEY,
        'email': EMAIL,
    }
    for name in ['2018','2019','2020']:
        print(f"Processing name: {name}")
        for id, location_ids in enumerate(POINTS):
            input_data['names'] = [name]
            input_data['location_ids'] = location_ids
            print(f'Making request for point group {id + 1} of {len(POINTS)}...')

            if '.csv' in BASE_URL:
                url = BASE_URL + urllib.parse.urlencode(input_data, True)
                data = pd.read_csv(url, on_bad_lines='skip', skiprows=1)

                print(f'Response data for year {name} and site {location_ids}')

                file_loc = r"Data\WTK_LED CONUS [2018-2020] 60min/" + str(location_ids + ".csv")

                if name == '2018':
                    data.to_csv(file_loc)
                else:
                    pd.concat([pd.read_csv(file_loc, index_col=0), data]).to_csv(file_loc)
            else:
                headers = {
                    'x-api-key': API_KEY
                }
                data = get_response_json_and_handle_errors(requests.post(BASE_URL, input_data, headers=headers))
                download_url = data['outputs']['downloadUrl']
                # You can do with what you will the download url
                print(data['outputs']['message'])
                print(f"Data can be downloaded from this url when ready: {download_url}")

                # Delay for 1 second to prevent rate limiting
                time.sleep(1)
            print(f'Processed')


def get_response_json_and_handle_errors(response: requests.Response) -> dict:
    """Takes the given response and handles any errors, along with providing
    the resulting json

    Parameters
    ----------
    response : requests.Response
        The response object

    Returns
    -------
    dict
        The resulting json
    """
    if response.status_code != 200:
        print(f"An error has occurred with the server or the request. The request response code/status: {response.status_code} {response.reason}")
        print(f"The response body: {response.text}")
        exit(1)

    try:
        response_json = response.json()
    except:
        print(f"The response couldn't be parsed as JSON, likely an issue with the server, here is the text: {response.text}")
        exit(1)

    if len(response_json['errors']) > 0:
        errors = '\n'.join(response_json['errors'])
        print(f"The request errored out, here are the errors: {errors}")
        exit(1)
    return response_json

if __name__ == "__main__":
    main()

Processing name: 2018
Making request for point group 1 of 100...
Response data for year 2018 and site 244138
Processed
Making request for point group 2 of 100...
Response data for year 2018 and site 30891
Processed
Making request for point group 3 of 100...
Response data for year 2018 and site 56031
Processed
Making request for point group 4 of 100...
Response data for year 2018 and site 58085
Processed
Making request for point group 5 of 100...
Response data for year 2018 and site 282816
Processed
Making request for point group 6 of 100...
Response data for year 2018 and site 118158
Processed
Making request for point group 7 of 100...
Response data for year 2018 and site 88246
Processed
Making request for point group 8 of 100...
Response data for year 2018 and site 178499
Processed
Making request for point group 9 of 100...
Response data for year 2018 and site 326920
Processed
Making request for point group 10 of 100...
Response data for year 2018 and site 164677
Processed
Making requ

In [ ]:
for filename in os.listdir("Data\WTK_LED CONUS [2018-2020] 60min"):
    # turbulent kinetic energy at 80m (m2/s2)
    wtk_data = pd.read_csv("Data\WTK_LED CONUS [2018-2020] 60min/" + filename, index_col=0).reset_index(drop=True)
    now_data = pd.read_csv(r"Data\NOW-23 Great Lakes [2000-2020] 60min/" + filename, index_col=0)
    wtk_data['turbulent kinetic energy at 80m (m2/s2)'] = list(now_data[now_data['Year'] >= 2018]['turbulent kinetic energy at 80m (m2/s2)'])
    wtk_data.to_csv("Data\WTK_LED CONUS [2018-2020] 60min/" + filename)

In [2]:
def distance(pos1, pos2):
    return np.sqrt((pos1[0] - pos2[0])**2 + (pos1[1] - pos2[1])**2)

df = pd.DataFrame()
df['Name'] = list()
df['Longitude'] = list()
df['Latitude'] = list()
metadata = pd.read_csv("Data/NOW-23 Great Lakes [2000-2020] metadata.csv", index_col=0)
columns = ['Station_ID', 'Station_name', 'DATE', 'Latitude', 'Longitude', 'Elevation', 'temperature', 'dew_point_temperature', 'station_level_pressure', 'sea_level_pressure', 'relative_humidity', 'wet_bulb_temperature', 'visibility', 'altimeter', 'pressure_3hr_change', 'sky_cover_1']


for filename in os.listdir(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709"):
    data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
    df.loc[len(df)] = [filename[:-4], data['Longitude'].iloc[0], data['Latitude'].iloc[0]]
display(df)

  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v1.0.0_d2015_c20240709/" + filename, delimiter='|')[columns]
  data = pd.read_csv(r"Data\NOAA\ghcn-hourly_v

Unnamed: 0,Name,Longitude,Latitude
0,GHCNh_ACL000BARA9_2015,-61.8210,17.5910
1,GHCNh_ACW00011647_2015,-61.7833,17.1333
2,GHCNh_AEI0000OMAA_2015,54.6511,24.4330
3,GHCNh_AEI0000OMAD_2015,54.4581,24.4283
4,GHCNh_AEI0000OMAH_2015,52.4636,24.0740
...,...,...,...
12114,GHCNh_ZIM00067899_2015,31.5670,-18.6170
12115,GHCNh_ZIM00067961_2015,28.4500,-20.9170
12116,GHCNh_ZIM00067963_2015,28.5000,-20.3830
12117,GHCNh_ZIM00067976_2015,31.0830,-20.5500


In [5]:
name_list = list()
for i in range(len(metadata)):
    print(f"{i+1} of 100")
    long, lat = metadata['Longitude'].iloc[i], metadata['Latitude'].iloc[i]

    name = ""
    min_dist = np.inf
    for j in range(len(df)):
        dist = distance([long, lat], [df['Longitude'].iloc[j], df['Latitude'].iloc[j]])
        if dist < min_dist:
            min_dist = dist
            name = df['Name'].iloc[j]
    name_list.append(name[:-5])
print(name_list)

1 of 100
2 of 100
3 of 100
4 of 100
5 of 100
6 of 100
7 of 100
8 of 100
9 of 100
10 of 100
11 of 100
12 of 100
13 of 100
14 of 100
15 of 100
16 of 100
17 of 100
18 of 100
19 of 100
20 of 100
21 of 100
22 of 100
23 of 100
24 of 100
25 of 100
26 of 100
27 of 100
28 of 100
29 of 100
30 of 100
31 of 100
32 of 100
33 of 100
34 of 100
35 of 100
36 of 100
37 of 100
38 of 100
39 of 100
40 of 100
41 of 100
42 of 100
43 of 100
44 of 100
45 of 100
46 of 100
47 of 100
48 of 100
49 of 100
50 of 100
51 of 100
52 of 100
53 of 100
54 of 100
55 of 100
56 of 100
57 of 100
58 of 100
59 of 100
60 of 100
61 of 100
62 of 100
63 of 100
64 of 100
65 of 100
66 of 100
67 of 100
68 of 100
69 of 100
70 of 100
71 of 100
72 of 100
73 of 100
74 of 100
75 of 100
76 of 100
77 of 100
78 of 100
79 of 100
80 of 100
81 of 100
82 of 100
83 of 100
84 of 100
85 of 100
86 of 100
87 of 100
88 of 100
89 of 100
90 of 100
91 of 100
92 of 100
93 of 100
94 of 100
95 of 100
96 of 100
97 of 100
98 of 100
99 of 100
100 of 100
['GHCNh_

In [31]:
params = {
    "bbox":"-94,40,-74,51",
    "sdate":"2015-01-01",
    "edate":"2020-12-31",
    "meta":"name,state,ll,uid",
    "elems":'[{"vX":33},{"vX":26}]'
}
response = requests.get("https://data.rcc-acis.org/StnMeta?" + urllib.parse.urlencode(params, True))
pd.DataFrame(response.json()['meta'])

JSONDecodeError: Expecting value: line 1 column 1 (char 0)