Check out the CROW dataset

-> Sarah Zimmermann : Sarah.Zimmermann@dfo-mpo.gc.ca
-> Bill Williams: Bill.Williams@dfo-mpo.gc.ca

Wintertime CTD, Chl-a and oxygen, Sea Ice parameters

In [11]:
%matplotlib inline
import proplot as pplt # plotting
import xarray as xr # netCDF Handling
import xesmf as xe # might be useful
#import BG.readDat as readDat # might be useful 
import numpy as np # might be useful
import pandas as pd # for reading the csv files
import datetime as dt # for date handling
from matplotlib import pyplot as plt # for plotting
# for every row in dataframe, find the griddcell index in the dataset
from gridcellfinder import gridcellfinder as gridcellfinder

In [12]:
# set name lists etc
IMPORT_PATH = 'DATA/OBS/CROW/data_from_All_CROW.nc'
depth = True # if using this, rewrite the dataset with new dimension or sth like that.
# read in a piece of data with nav_lon nav_lat
xrds = xr.open_dataset('DATA/G510_10_EXP13/NAA_730h_20020101_20021231_diad_T.nc')
xrds.nav_lat
 
ds = xr.open_dataset(IMPORT_PATH)


In [13]:
#rename({'nav_lon': 'longitude', 'nav_lat': 'latitude', 'time_counter': 'time'})
# from attached txt file:
ds = ds.rename({'var1': 'Pressure', 'var2': 'Fluorescence',
               'var3': 'Conductivity',	'var4': 'Depth',
               'var5': 'Oxygen',	'var6': 'Salinity',
               'var7': 'Temperature','var8': 'Timeline_Serial_Days',
               'var9': 'Oxygen_Volts','var10': 'Oxygen_Saturation',
               'metavar1':'Cruise', 'metavar2':	'Station', 'metavar3': 'Type', 
               'longitude': 'Longitude', 'latitude': 'Latitude'})
ds = ds.drop_vars(['var1_qc','var2_qc','var3_qc','var4_qc','var5_qc','var6_qc','var7_qc','var8_qc','var9_qc','var10_qc'] )
ds


In [14]:
# get rid of the byte-coded strings like b'fnkjeshrg'
cr = ds.Cruise.values
st = ds.Station.values
ty = ds.Type.values
cruise = []
station = []
typs = []
for i in range(0,len(cr)):
  cruise.append(cr[i].decode())
  station.append(st[i].decode())
  typs.append(ty[i].decode())
ds['Cruise'] = ('N_STATIONS' , cruise) 
ds['Station']= ('N_STATIONS' , station)
ds['Type'] =  ('N_STATIONS' ,typs)


In [15]:
# Sort out Metadata
Station_Name = []
for i in range(0,len(ds.N_STATIONS)):
  Stat_name =str(ds.Station[i].values)
  Stat_nr = str(ds.Cruise[i].values)
  mnth = ds.date_time[i].dt.strftime('%m')
  yer = ds.date_time[i].dt.strftime('%Y')
  day = ds.date_time[i].dt.strftime('%D') 
  Station_Name.append(Stat_name+'_')#Stat_nr +'_'+
#Station_Name  
ds['Stat_id'] = ds['date_time'].dt.strftime('%Y')+'-'+ds['date_time'].dt.strftime('%m') + \
                 '_CROW_CTD_' +Station_Name+ds['date_time'].dt.strftime('%d') 
#ds['Station']

#Assign a few more variables to the dataset

ds['Longitude'] = ds['Longitude'].astype(float)
#print(df['Longitude'])

# Longitude is in the wrong format (360° instead of 180°)
ds['Longitude'] = ds['Longitude'] -360
#print(df['Longitude'])

ds['Latitude'] = ds['Latitude'].astype(float)
  
ds['lons'] = ds['Longitude'].round(2) 
ds['lats'] = ds['Latitude'].round(2)
ds['lonmin'] = ds['lons']  - 0.1 #0.1 in degrees is appox 10 km -> i read in  Roberts(?) 2022 paper, they did the same
ds['lonmax'] = ds['lons']  + 0.1
ds['latmin'] = ds['lats'] - 0.1
ds['latmax'] = ds['lats'] + 0.1


In [16]:
## Create a dataframe containing Station number, Lat, Lon, but no variables
df = pd.DataFrame({'Stat_id': ds['Stat_id'], 'Latitude': ds['Latitude'], 'Longitude': ds['Longitude'], 
                   'date_time': ds['date_time']})

gcl = pd.DataFrame( columns=['X', 'Y'] )
for i in [*range(0,len(df),1)]: 
  x,y = gridcellfinder(ds, xrds, i)
  X = x.tolist()
  Y = y.tolist()
  lenx = len(X)
  leny = len(Y)
  gcl.loc[len(gcl.index)] = [str(X), str(Y), lenx,leny] 
  
ds['X_NAA'] = ('N_STATIONS', gcl['X'])
ds['Y_NAA'] = ('N_STATIONS', gcl['Y'])
ds['lenx'] =  ('N_STATIONS',gcl['lenx'])
ds['leny'] =  ('N_STATIONS',gcl['leny'])


In [17]:
ds
blurb = ds.drop_dims('N_SAMPLES')
v_list = list(blurb)

In [18]:
ncds = xr.open_dataset('DATA/NAA_Area_Volume.nc') # this is the updated file with corrected depth cut-offs 
z = [0]
zc  = list(ncds.z_cuts.values)
zcu = z + zc
d = ncds.deptht.values

# Cleaned up work-around for the problem with 1 Dimensional variables (per slice)
def binner(ds = ds, i = 0, varlist = [] ): 
  # Depth Bins
  slce = ds.isel(N_STATIONS = i)
  oneD = slce[varlist]
  twoD = slce.drop_vars(varlist)
  new_ds = twoD.groupby_bins(slce.Depth, bins = zcu, labels = d).mean()
  fu = xr.merge([new_ds, oneD])
  fu = fu.assign_coords(Stat_id= (oneD['Stat_id'].data))
  fu = fu.rename({'Depth_bins': 'NAA_Deptht'})
  return(fu)


In [19]:
# Initial, i = 0
ndu = binner(ds = ds, i = 0, varlist = v_list)

# add the rest via loop
for i in range(1,len(ds.N_STATIONS)): 
  #print(i)
  du = binner(ds = ds, i = i, varlist = v_list)
  ndu = xr.concat([ndu,du], dim = 'Stat_id' )
ndu

In [20]:
# Test that the Station ID stays the same every time
#for f in range(0,100,15): 
#  print(ndu.Stat_id[f].values)
#  print(ndu.Station[f].values)
#  print(ndu.date_time[f].values)
#  print('--------------')

# Save the new Dataset
#df.to_csv('OUTPUT/CROW_meta.csv', index=False)
ndu.to_netcdf('OUTPUT/CROW_NAA_deptht.nc')
