Skip to content

Commit

Permalink
last changes
Browse files Browse the repository at this point in the history
  • Loading branch information
BaptisteVandecrux committed Jun 18, 2024
1 parent f3785eb commit a19c406
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 70 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"pypromice.qc.percentiles": ["thresholds.csv"],
"pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"],
},
install_requires=['numpy>=1.23.0', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'],
install_requires=['numpy>=1.23.0', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.14.1'],
# extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']},
entry_points={
'console_scripts': [
Expand Down
13 changes: 5 additions & 8 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
AWS Level 2 (L2) to Level 3 (L3) data processing
"""
import pandas as pd
import numpy as np
import xarray as xr
from statsmodels.nonparametric.smoothers_lowess import lowess
Expand Down Expand Up @@ -73,8 +74,6 @@ def toL3(L2, T_0=273.15):
# Smoothing and inter/extrapolation of GPS coordinates

for var in ['gps_lat', 'gps_lon', 'gps_alt']:
logger.info('Postprocessing '+var)

# saving the static value and droping 'lat','lon' or 'alt' as they are
# being reassigned as timeseries
var_out = var.replace('gps_','')
Expand All @@ -83,13 +82,12 @@ def toL3(L2, T_0=273.15):
if 'altitude' in list(ds.attrs.keys()):
static_value = float(ds.attrs['altitude'])
else:
print('no standard altitude for', ds.station_id)
print('no standard altitude for', ds.station_id.item())
static_value = np.nan
elif var_out == 'lat':
static_value = float(ds.attrs['latitude'])
elif var_out == 'lon':
static_value = float(ds.attrs['longitude'])
ds=ds.drop_vars(var_out)

# if there is no gps observations, then we use the static value repeated
# for each time stamp
Expand Down Expand Up @@ -117,10 +115,9 @@ def toL3(L2, T_0=273.15):
# smoothing and inter/extrapolation of the coordinate
ds[var_out] = \
('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks))

ds['lat_avg'] = ds['lat'].mean()
ds['lon_avg'] = ds['lon'].mean()
ds['alt_avg'] = ds['alt'].mean()

for v in ['lat','lon','alt']:
ds[v+'_avg'] = ds[v].mean(dim='time')
return ds


Expand Down
2 changes: 0 additions & 2 deletions src/pypromice/process/join_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ def join_l2():

# Resample to hourly, daily and monthly datasets and write to file
prepare_and_write(all_ds, args.outpath, args.variables, args.metadata, resample = False)

logger.info(f'Files saved to {os.path.join(args.outpath, name)}...')

if __name__ == "__main__":
join_l2()
31 changes: 19 additions & 12 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def loadArr(infile):
except:
name = infile.split('/')[-1].split('.')[0].split('_hour')[0].split('_10min')[0]

print(f'{name} array loaded from {infile}')
logger.info(f'{name} array loaded from {infile}')
return ds, name


Expand Down Expand Up @@ -192,12 +192,14 @@ def join_l3():
stream=sys.stdout,
)

# getting list of station associated with the given site args.site
station_dict = build_station_dict(args.config_folder)

l3m = xr.Dataset()
for stid in station_dict[args.site]:
logger.info(stid)

list_stations = station_dict[args.site]

# reading the datasets and storing them into a list along with their latest
# timestamp.
list_station_data = []
for stid in list_stations:
is_promice = False
is_gcnet = False
filepath = os.path.join(args.folder_l3, stid, stid+'_hour.nc')
Expand All @@ -212,14 +214,19 @@ def join_l3():
continue

l3, _ = loadArr(filepath)

if is_gcnet:
l3 = gcnet_postprocessing(l3)

# lat, lon and alt should be just variables, not coordinates
if 'lat' in l3.keys():
l3 = l3.reset_coords(['lat', 'lon', 'alt'])


list_station_data.append((l3, l3.time.max().values, stid))

# we then sort that list in reverse chronlogical order so that we start with the latest
# data and iteratively append data backward in time
sorted_list_station_data = sorted(list_station_data, key=lambda x: x[1], reverse=True)
sorted_stids = [stid for _, _, stid in sorted_list_station_data]
logger.info('joining %s'%' '.join(sorted_stids))
l3m = xr.Dataset()
for l3, _, stid in sorted_list_station_data:
if len(l3m)==0:
# saving attributes of station under an attribute called $stid
l3m.attrs[stid] = l3.attrs.copy()
Expand Down
102 changes: 69 additions & 33 deletions src/pypromice/process/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""
Utilities module for data formatting, populating and metadata handling
"""
import datetime, uuid
import datetime, uuid, logging
logger = logging.getLogger(__name__)
from importlib import metadata
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -47,10 +48,12 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']):
id = dataset.attrs['site_id']

if id not in exempt:
if 'gps_lon' not in dataset.keys():
print("?????????", id, "missing gps_lon")
return dataset
dataset['gps_lon'] = dataset['gps_lon'] * -1
for v in ['gps_lon','lon','lon_avg']:
if v in dataset.keys():
dataset[v] = np.abs(dataset[v]) * -1
else:
logger.info('%s does not have %s'%(id, v))

return dataset

def popCols(ds, names):
Expand Down Expand Up @@ -136,7 +139,15 @@ def addVars(ds, variables):
ds[k].attrs['long_name'] = variables.loc[k]['long_name']
ds[k].attrs['units'] = variables.loc[k]['units']
ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type']
ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']
if isinstance(variables.loc[k]['coordinates'], float):
if np.isnan(variables.loc[k]['coordinates']):
variables.loc[k, 'coordinates'] = ''
else:
print(k, variables.loc[k]['coordinates'])
if 'station_id' in ds.attrs.keys():
ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']+' station_id'
elif 'site_id' in ds.attrs.keys():
ds[k].attrs['coordinates'] = variables.loc[k]['coordinates']+' site_id'
return ds

def addMeta(ds, meta):
Expand All @@ -154,20 +165,6 @@ def addMeta(ds, meta):
ds : xarray.Dataset
Dataset with metadata
'''
if 'gps_lon' in ds.keys():
ds['lon'] = ds['gps_lon'].mean()
ds['lon'].attrs = ds['gps_lon'].attrs

ds['lat'] = ds['gps_lat'].mean()
ds['lat'].attrs = ds['gps_lat'].attrs

ds['alt'] = ds['gps_alt'].mean()
ds['alt'].attrs = ds['gps_alt'].attrs

# for k in ds.keys(): # for each var
# if 'units' in ds[k].attrs:
# if ds[k].attrs['units'] == 'C':
# ds[k].attrs['units'] = 'degrees_C'

# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds
if 'station_id' in ds.attrs.keys():
Expand All @@ -179,20 +176,59 @@ def addMeta(ds, meta):
ds.attrs['date_modified'] = ds.attrs['date_created']
ds.attrs['date_issued'] = ds.attrs['date_created']
ds.attrs['date_metadata_modified'] = ds.attrs['date_created']

if 'lat' in ds.keys():
lat_min = ds['lat'].min().values
lat_max = ds['lat'].max().values
elif 'gps_lat' in ds.keys():
lat_min = ds['gps_lat'].min().values
lat_max = ds['gps_lat'].max().values
elif 'latitude' in ds.attrs.keys():
lat_min = ds.attrs['latitude']
lat_max = ds.attrs['latitude']
else:
lat_min =np.nan
lat_max = np.nan


if 'lon' in ds.keys():
lon_min = ds['lon'].min().values
lon_max = ds['lon'].max().values
elif 'gps_lon' in ds.keys():
lon_min = ds['gps_lon'].min().values
lon_max = ds['gps_lon'].max().values
elif 'longitude' in ds.attrs.keys():
lon_min = ds.attrs['longitude']
lon_max = ds.attrs['longitude']
else:
lon_min =np.nan
lon_max = np.nan

if 'alt' in ds.keys():
alt_min = ds['alt'].min().values
alt_max = ds['alt'].max().values
elif 'gps_alt' in ds.keys():
alt_min = ds['gps_alt'].min().values
alt_max = ds['gps_alt'].max().values
elif 'altitude' in ds.attrs.keys():
alt_min = ds.attrs['altitude']
alt_max = ds.attrs['altitude']
else:
alt_min =np.nan
alt_max = np.nan

ds.attrs['geospatial_bounds'] = "POLYGON((" + \
f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].min().values}))"

ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
f"{lat_min} {lon_min}, " + \
f"{lat_min} {lon_max}, " + \
f"{lat_max} {lon_max}, " + \
f"{lat_max} {lon_min}, " + \
f"{lat_min} {lon_min}))"

ds.attrs['geospatial_lat_min'] = str(lat_min)
ds.attrs['geospatial_lat_max'] = str(lat_max)
ds.attrs['geospatial_lon_min'] = str(lon_min)
ds.attrs['geospatial_lon_max'] = str(lon_max)
ds.attrs['geospatial_vertical_min'] = str(alt_min)
ds.attrs['geospatial_vertical_max'] = str(alt_max)
ds.attrs['geospatial_vertical_positive'] = 'up'
ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
Expand Down
12 changes: 5 additions & 7 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
# Resample dataset
if resample:
d2 = resample_dataset(dataset, time)
logger.info('Resampling to '+str(time))
else:
logger.info('No resampling')
d2 = dataset.copy()

# Reformat time
Expand All @@ -40,14 +40,13 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
# finding station/site name
if 'station_id' in d2.attrs.keys():
name = d2.attrs['station_id']
d2['station_id'] = d2.attrs['station_id']
else:
name = d2.attrs['site_id']
d2['site_id'] = d2.attrs['site_id']

# Reformat longitude (to negative values)
if 'gps_lon' in d2.keys():
d2 = utilities.reformat_lon(d2)
else:
logger.info('%s does not have gpd_lon'%name)
d2 = utilities.reformat_lon(d2)

# Add variable attributes and metadata
if vars_df is None:
Expand Down Expand Up @@ -95,11 +94,10 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
if not os.path.isdir(outdir):
os.mkdir(outdir)
# Write to csv file
logger.info('Writing to files...')
writeCSV(out_csv, d2, col_names)

# Write to netcdf file
col_names = col_names + ['lat', 'lon', 'alt']
col_names = col_names
writeNC(out_nc, d2, col_names)
logger.info(f'Written to {out_csv}')
logger.info(f'Written to {out_nc}')
Expand Down
6 changes: 3 additions & 3 deletions src/pypromice/ressources/variable_aliases_GC-Net.csv
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ t_i_11,
tilt_x,
tilt_y,
rot,
gps_lat,latitude
gps_lon,longitude
gps_alt,elevation
lat,latitude
lon,longitude
elev,elevation
gps_time,
gps_geounit,
gps_hdop,
Expand Down
10 changes: 6 additions & 4 deletions src/pypromice/ressources/variables.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
field,standard_name,long_name,units,coverage_content_type,coordinates,instantaneous_hourly,where_to_find,lo,hi,OOL,station_type,L0,L2,L3,max_decimals
time,time,Time,yyyy-mm-dd HH:MM:SS,physicalMeasurement,time,,,,,,all,1,1,1,
station_id,station_name,station_name,,referenceInformation,,False,L0 to L3_stations,,,,all,1,1,1,
site_id,site_name,site_name,,referenceInformation,,False,L3_site only,,,,all,0,0,1,
rec,record,Record,-,referenceInformation,time,,L0 or L2,,,,all,1,1,0,0
p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time,FALSE,,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,1,1,1,4
p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,650,1100,dshf_l dlhf_l qh_l,two-boom,1,1,1,4
Expand Down Expand Up @@ -72,10 +74,10 @@ gps_numsat,gps_numsat,GPS number of satellites,-,qualityInformation,time,TRUE,L0
gps_q,gps_q,Quality,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0,
lat,latitude_postprocessed,smoothed and interpolated latitude of station (best estimate),degrees_N,modelResult,time,TRUE,L3,,,,all,0,0,1,6
lon,longitude_postprocessed,smoothed and interpolated longitude of station (best estimate),degrees_E,modelResult,time,TRUE,L3,,,,all,0,0,1,6
elev,elevation_postprocessed,smoothed and interpolated elevation of station (best estimate),m a.s.l. (WGS84),modelResult,time,TRUE,L3,,,,all,0,0,1,2
lat_avg,mean_latitude,mean latitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,6
lon_avg,mean_longitude,mean longitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,6
alt_avg,mean_altitude,mean altitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,2
alt,altitude_postprocessed,smoothed and interpolated altitude of station (best estimate),m,modelResult,time,TRUE,L3,,,,all,0,0,1,2
lat_avg,mean_latitude,mean latitude (from all time-series),degrees_N,modelResult,,False,,,,,all,0,0,1,6
lon_avg,mean_longitude,mean longitude (from all time-series),degrees_E,modelResult,,False,,,,,all,0,0,1,6
alt_avg,mean_altitude,mean altitude (from all time-series),m,modelResult,,False,,,,,all,0,0,1,2
batt_v,battery_voltage,Battery voltage,V,physicalMeasurement,time,TRUE,,0,30,,all,1,1,1,2
batt_v_ini,,,-,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2
batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2
Expand Down

0 comments on commit a19c406

Please sign in to comment.