In [33]:
import json
import pandas as pd
from sdig.erddap.info import Info
import numpy as np

In [58]:
datasets = 'datasets.json'
if datasets is not None:
    with open(datasets) as discovery_stream:
        datasets_json = json.load(discovery_stream)
sites = {}
for variable in datasets_json:
    sites_variable = {}
    for url in datasets_json[variable]['datasets']:
        info = Info(url)
        title = info.get_title()
        variables_list, long_names, units, standard_names, d_types = info.get_variables()
        locations = url + '.csv?site_code,wmo_platform_code,latitude,longitude&distinct()'
        df = pd.read_csv(locations, skiprows=[1])
        df['url'] = url
        df = df.set_index('site_code')
        url_sites = df.to_dict('index')
        for site in url_sites:
            if 'PRES' in variables_list:
                url_sites[site]['depth_name'] = 'PRES'
            elif 'depth' in variables_list:
                url_sites[site]['depth_name'] = 'depth'
            time_url = url + '.csv?site_code,time&orderByMinMax("time")&site_code="'+site+'"'
            time_df = pd.read_csv(time_url, skiprows=[1])
            url_sites[site]['start_time'] = time_df['time'].min()
            url_sites[site]['end_time'] = time_df['time'].max()
            url_sites[site]['title'] = title
            url_sites[site]['units'] = units[datasets_json[variable]['short_names'][0]]
            url_sites[site]['depth_units'] = units[url_sites[site]['depth_name']]
            url_sites[site]['long_name'] = long_names[datasets_json[variable]['short_names'][0]]
            sites_variable[site] = url_sites[site]
    sites[variable] = sites_variable
for variable in sites:
    datasets_json[variable]['sites'] = sites[variable]
with open('sites.json', 'w') as fp:
    json.dump(datasets_json, fp, indent=4)

In [59]:
df = pd.DataFrame.from_dict(datasets_json['temperature']['sites'], orient='index').reset_index().rename(columns={'index': 'site_code'})
df

Unnamed: 0,site_code,wmo_platform_code,latitude,longitude,url,depth_name,start_time,end_time,title,units,depth_units,long_name
0,KEO,28401,32.3,144.6,https://data.pmel.noaa.gov/generic/erddap/tabl...,PRES,2004-06-16T12:00:00Z,2022-05-22T00:00:00Z,OceanSITES KEO in-situ data: Hourly High Resol...,degree_Celsius,decibar,Temperature
1,Papa,48400,50.1,-144.9,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2007-06-07T23:00:00Z,2022-05-22T00:00:00Z,OceanSITES Papa hourly TEMP in-situ data,degree_Celsius,m,Temperature
2,0n23w,31007,0.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1999-03-06T14:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
3,0n35w,31001,0.0,-35.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-01-22T00:00:00Z,2020-05-01T00:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
4,10s10w,15001,-10.0,-10.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-11-04T10:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
5,12n23w,13001,12.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1999-03-06T14:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
6,15n38w,13008,15.0,-38.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-01-27T19:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
7,20n38w,41139,20.0,-38.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2007-05-22T04:00:00Z,2022-05-22T08:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
8,21n23w,13002,21.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2006-06-11T11:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature
9,4n23w,31006,4.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2006-06-11T11:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly TEMP in-situ data,degree_Celsius,m,Temperature


In [53]:
df = pd.DataFrame.from_dict(datasets_json['salinity']['sites'], orient='index').reset_index().rename(columns={'index': 'site_code'})
df

Unnamed: 0,site_code,wmo_platform_code,latitude,longitude,url,depth_name,start_time,end_time,title
0,KEO,28401,32.3,144.6,https://data.pmel.noaa.gov/generic/erddap/tabl...,PRES,2004-06-16T12:00:00Z,2022-05-22T00:00:00Z,OceanSITES KEO in-situ data: Hourly High Resol...
1,Papa,48400,50.1,-144.9,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2007-06-07T23:00:00Z,2022-05-22T00:00:00Z,OceanSITES Papa hourly PSAL in-situ data (Ocea...
2,0n23w,31007,0.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1999-03-06T14:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
3,0n35w,31001,0.0,-35.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-01-22T00:00:00Z,2020-05-01T00:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
4,10s10w,15001,-10.0,-10.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-11-04T10:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
5,12n23w,13001,12.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1999-03-06T14:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
6,15n38w,13008,15.0,-38.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,1998-01-27T19:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
7,20n38w,41139,20.0,-38.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2007-05-22T04:00:00Z,2022-05-22T08:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
8,21n23w,13002,21.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2006-06-11T11:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
9,4n23w,31006,4.0,-23.0,https://data.pmel.noaa.gov/generic/erddap/tabl...,depth,2006-06-11T11:00:00Z,2022-05-22T06:00:00Z,OceanSITES PIRATA hourly PSAL in-situ data
