## Erddap to Grid for Prawler

Using erddap as the data source, obtain each profile, filter out calibration profiles and provide interpolated/gridded dataset.

Gridding parameters:
Pressure - 1m
Time - 1hr

__pyversion__==3.6   
__author__==S.Bell

In [2]:
import datetime
print("Last run {0}".format(datetime.datetime.now()))

Last run 2018-10-10 15:24:43.877150


### connecting and basic information

In [3]:
import warnings
#remove the numpy/pandas/cython warnings
warnings.filterwarnings(action='ignore', message="numpy.dtype size changed,")

from erddapy import ERDDAP
import pandas as pd
import numpy as np

server_url = 'http://downdraft.pmel.noaa.gov:8080/erddap'

e = ERDDAP(server=server_url)

In [4]:
df = pd.read_csv(e.get_search_url(response='csv', search_for='PRAWLER'))

In [5]:
'We have {} tabledap, {} griddap, and {} wms endpoints.'.format(
    len(set(df['tabledap'].dropna())),
    len(set(df['griddap'].dropna())),
    len(set(df['wms'].dropna()))
)

'We have 8 tabledap, 0 griddap, and 0 wms endpoints.'

In [6]:
prawlers = df['Dataset ID'].values
print(prawlers)

['erddap_4fd3_fb30_b89f' 'erddap_17bsitae_prawler'
 'erddap_17ckitaem2a_prawler' 'erddap_18bsitaepr2a_prawler'
 'erddap_18mtitaepr1a_prawler' 'erddap_17ckitaem2a_met'
 'erddap_18bsitaepr2a_met' 'erddap_18mtitaepr1a_met']


In [7]:
variables = [e.get_var_by_attr(dataset_id=prawler, standard_name=lambda v: v is not None) for prawler in prawlers]
print(variables)

[['time', 'Chlorophyll', 'latitude', 'depth', 'Salinity', 'longitude'], ['time', 'Chlorophyll', 'latitude', 'depth', 'Salinity', 'longitude'], ['time', 'Oxy_Conc', 'Oxy_Temperature', 'Chlorophyll', 'SigmaT', 'latitude', 'depth', 'Temperature', 'Oxy_Sat', 'Salinity', 'longitude', 'Turbidity'], ['time', 'Chlorophyll', 'latitude', 'depth', 'Salinity', 'longitude'], ['time', 'Chlorophyll', 'latitude', 'depth', 'Salinity', 'longitude'], ['time', 'wind_speed', 'relative_humidity', 'northward_wind', 'wind_from_direction', 'latitude', 'air_temperature', 'air_pressure', 'longitude', 'eastward_wind'], ['time', 'wind_speed', 'relative_humidity', 'northward_wind', 'wind_from_direction', 'latitude', 'air_temperature', 'air_pressure', 'longitude', 'eastward_wind'], ['time', 'wind_speed', 'relative_humidity', 'northward_wind', 'wind_from_direction', 'latitude', 'air_temperature', 'air_pressure', 'longitude', 'eastward_wind']]


### retrieving and plotting data

In [8]:
d = ERDDAP(server=server_url,
           protocol='tabledap',
           response='csv',
          )

d.dataset_id='erddap_18bsitaepr2a_prawler'

d.variables =  [
 'profile_id',
 'Temperature',
 'Salinity',
 'Chlorophyll',
 'Turbidity',
 'latitude',
 'longitude',
 'depth',
 'time',
 'Oxy_Conc',
 'Oxy_Sat'
]

d.constraints = {
    'time>=': '2018-01-01T00:00:00Z',
    'time<=': '2018-10-10T00:00:00Z',
    'latitude>=': 45,
    'latitude<=': 90,
    'longitude>=': 180,
    'longitude<=': 210,
    'profile_id!=': '18bsitaepr2a_met'
}

In [9]:
d.get_download_url()

'http://downdraft.pmel.noaa.gov:8080/erddap/tabledap/erddap_18bsitaepr2a_prawler.csv?profile_id,Temperature,Salinity,Chlorophyll,Turbidity,latitude,longitude,depth,time,Oxy_Conc,Oxy_Sat&time>=1514764800.0&time<=1539129600.0&latitude>=45&latitude<=90&longitude>=180&longitude<=210&profile_id!="18bsitaepr2a_met"'

In [10]:
df = d.to_pandas(
    index_col='time',
    parse_dates=True,
    skiprows=(1,)  # units information can be dropped.
).dropna()

df.head()

Unnamed: 0_level_0,profile_id,Temperature,Salinity,Chlorophyll,Turbidity,latitude,longitude,depth,Oxy_Conc,Oxy_Sat
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-05-01 20:53:53,18bsitaepr2a_p0003,3.696,32.06,0.339,0.629,56.863998,195.947006,5.7,312.0346,98.69056
2018-05-01 20:54:01,18bsitaepr2a_p0003,3.693,32.063,0.315,0.557,56.863998,195.947006,5.6,311.99353,98.6728
2018-05-01 20:54:09,18bsitaepr2a_p0003,3.694,32.061,0.375,0.605,56.863998,195.947006,6.0,312.04153,98.68891
2018-05-01 20:54:17,18bsitaepr2a_p0003,3.686,32.062,0.339,0.605,56.863998,195.947006,5.9,312.03372,98.66802
2018-05-01 20:54:25,18bsitaepr2a_p0003,3.699,32.06,0.327,0.92,56.863998,195.947006,5.8,312.02933,98.69614


In [11]:
df.tail()

Unnamed: 0_level_0,profile_id,Temperature,Salinity,Chlorophyll,Turbidity,latitude,longitude,depth,Oxy_Conc,Oxy_Sat
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-10-01 20:07:09,18bsitaepr2a_p5193,5.227,32.155,0.799,98.736,56.863998,195.947006,40.8,265.70862,87.25975
2018-10-01 20:07:17,18bsitaepr2a_p5193,5.199,32.14,0.23,1.573,56.863998,195.947006,42.6,255.78642,83.93611
2018-10-01 20:07:25,18bsitaepr2a_p5193,5.172,32.082,0.23,13.286,56.863998,195.947006,43.7,248.74664,81.53549
2018-10-01 20:07:33,18bsitaepr2a_p5193,5.164,32.055,0.194,3.073,56.863998,195.947006,45.0,241.9317,79.26952
2018-10-01 20:07:41,18bsitaepr2a_p5193,5.173,32.034,0.218,1.186,56.863998,195.947006,45.4,236.68439,77.55346


### Interpolating and Gridding

#### Gridding Parameters

Set pressure interval to 1m and build a grid from 0-50m

In [13]:
### vertically grid data to evenly space gridspoints
# deployment depth has a maximum value - set at 50 generically
interval = 1 #m
press_grid = np.arange(0,50,interval) #1m

#### Temperature

We need to isolate the calibration park&holds first and remove them from the gridded analysis for Temperature/Salinity/Chlorophyl/Turbidity
We may wish to use them for the oxygen.

In [14]:
#groupby profile id
dfsg = df.groupby('profile_id')

In [39]:
cast = '18bsitaepr2a_p5193' 

if np.std(dfsg.get_group(cast)['depth']) <= 1.0:
    cal_profile = True
else:
    cal_profile = False

for pg in press_grid:
    """ Take the median value if multiple samples occur within same depth bin"""
    if not cal_profile:
        ireg_ind = np.where((dfsg.get_group(cast)['depth'] > pg) & (dfsg.get_group(cast)['depth'] <= pg+interval))
        print(ireg_ind)


(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([1, 2]),)
(array([0, 3, 4]),)
(array([5]),)
(array([6]),)
(array([7]),)
(array([8]),)
(array([9]),)
(array([10]),)
(array([11]),)
(array([12]),)
(array([13]),)
(array([14]),)
(array([15]),)
(array([], dtype=int64),)
(array([16]),)
(array([17]),)
(array([18]),)
(array([19]),)
(array([20]),)
(array([], dtype=int64),)
(array([21]),)
(array([22]),)
(array([23]),)
(array([], dtype=int64),)
(array([24]),)
(array([25]),)
(array([], dtype=int64),)
(array([26]),)
(array([27]),)
(array([], dtype=int64),)
(array([28]),)
(array([29]),)
(array([], dtype=int64),)
(array([30]),)
(array([31]),)
(array([], dtype=int64),)
(array([32]),)
(array([33]),)
(array([], dtype=int64),)
(array([34]),)
(array([35]),)
(array([36]),)
(array([37]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
