In [1]:
import numpy as np
import netCDF4 as nc
import csv 
import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.dates as pltd
import matplotlib.colors as mcolors
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import datetime as dt
from matplotlib.collections import PolyCollection
from matplotlib.tri import Triangulation, TriAnalyzer, UniformTriRefiner
import pandas
import xarray as xr
import xesmf
import copy
import time
import warnings
from scipy import stats
warnings.filterwarnings("ignore")

In [2]:
import os, psutil, numpy as np # psutil may need to be installed
def usage():
    process = psutil.Process(os.getpid())
    return process.memory_info()[0] / float(2 ** 20)

In [11]:
# OBJECTIVE: Plot Surf Temp, O2, PH time-series
calcofi_data = './194903-202010_Bottle.csv'
station_fil = './CalCOFIStationOrder.csv'
df_locations = pandas.read_csv(station_fil,encoding= 'unicode_escape')
df_locations

Unnamed: 0,Order Occ,Line,Station,Station Dlatitude,Dlongitude,Est Depth,Unnamed: 6
0,1,93.3,26.7,32.95637,117.30538,63.0,
1,2,93.4,26.4,32.94905,117.27357,20.0,
2,3,91.7,26.4,33.24350,117.46542,20.0,
3,4,93.3,28.0,32.91304,117.39438,609.0,
4,5,93.3,30.0,32.84637,117.53122,846.0,
...,...,...,...,...,...,...,...
108,109,60.0,60.0,37.61410,123.60825,3298.0,
109,110,60.0,70.0,37.28077,124.33237,3999.0,
110,111,60.0,80.0,36.94743,125.05327,,
111,112,60.0,90.0,36.61410,125.77099,,


In [18]:
df2 = pandas.read_csv(calcofi_data,encoding= 'unicode_escape',low_memory=False)

In [19]:
df_tmp2 = df2[['Sta_ID','Depth_ID','Depthm','Oxy_µmol/Kg']]

df_tmp2['Line'] = df_tmp2['Sta_ID'].str.split(' ').str[0].astype(float)
df_tmp2['Station'] = df_tmp2['Sta_ID'].str.split(' ').str[1].astype(float)

df_tmp2['YEAR'] = df_tmp2['Depth_ID'].map(lambda x: x[:5].replace('-',''))
df_tmp2 = df_tmp2[df_tmp2['YEAR'].astype(int) > 1992].reset_index(drop=True)
df_tmp2 = df_tmp2[df_tmp2['YEAR'].astype(int) < 2020].reset_index(drop=True)

df_tmp2['Date'] = [(dt.datetime(int(x.split('-')[0]+x.split('-')[1][:2]), 1, 1) 
                   + dt.timedelta(days=int(x.split('-')[3]) - 1)).strftime("%Y-%m-%d") for x in df_tmp2['Depth_ID']]
df_tmp2['UTC Time (hhmm)'] = [x.split('-')[4] for x in df_tmp2['Depth_ID']]

df_tmp2 = df_tmp2.drop(columns=['Sta_ID', 'Depth_ID'])
del df2
df_tmp2


Unnamed: 0,Depthm,Oxy_µmol/Kg,Line,Station,YEAR,Date,UTC Time (hhmm)
0,0,251.0801,93.3,26.7,1993,1993-01-12,2043
1,10,250.2061,93.3,26.7,1993,1993-01-12,2043
2,20,248.4601,93.3,26.7,1993,1993-01-12,2043
3,30,242.3478,93.3,26.7,1993,1993-01-12,2043
4,41,243.2127,93.3,26.7,1993,1993-01-12,2043
...,...,...,...,...,...,...,...
219088,0,265.1000,93.4,26.4,2019,2019-11-04,2233
219089,2,264.6200,93.4,26.4,2019,2019-11-04,2233
219090,6,264.1100,93.4,26.4,2019,2019-11-04,2233
219091,10,260.9200,93.4,26.4,2019,2019-11-04,2233


In [20]:
# preparing regridder object
lon_ca = np.zeros(len(df_locations)) 
lat_ca = np.zeros(len(df_locations))
line_ca = np.zeros(len(df_locations))
sta_ca = np.zeros(len(df_locations))

for index, row in df_locations.iterrows():
    lon_ca[row['Order Occ']-1] = row['Lon (dec)'] + 360 # convert longitude to NEP convention
    lat_ca[row['Order Occ']-1] = row['Lat (dec)']
    sta_ca[row['Order Occ']-1] = row['Sta']
    line_ca[row['Order Occ']-1] = row['Line']

# station array for regridder
cstat_locs = xr.Dataset()
cstat_locs['lon'] = xr.DataArray(data=lon_ca, dims=('stations'))
cstat_locs['lat'] = xr.DataArray(data=lat_ca, dims=('stations'))

# calcofi grid file
fid = nc.Dataset('./nep_calcofi_grid.nc')
nep_cal_lat = fid.variables['geolat'][:]
nep_cal_lon = fid.variables['geolon'][:]

# nep grid array for regridder
nep_locs = xr.Dataset()
nep_locs['lon'] = xr.DataArray(data=nep_cal_lon, dims=('yh','xh'))
nep_locs['lat'] = xr.DataArray(data=nep_cal_lat, dims=('yh','xh'))

# regridder for stations
regridder = xesmf.Regridder(nep_locs, cstat_locs, 'bilinear', locstream_out=True)

In [21]:
# input calcofi region data sets from nep 
nep_o2_fil = './nep_calcofi_o2_1993-2019_ts.nc'
nep_o2_ds = xr.open_dataset(nep_o2_fil)

In [22]:
# nep values regridded to CalCOFI
nep_cal_o2 = regridder(nep_o2_ds).assign_coords(line=(line_ca)).assign_coords(station=(sta_ca)).rename(z_l='depth')

In [23]:
df_tmp3 = copy.deepcopy(df_tmp2)

# generate copy of trawl dataframe with added empty column for interpolated NEP values
df_tmp3 = df_tmp3.assign(nep_o2=np.nan*np.zeros(len(df_tmp3)))

start_time = time.time()
rows_skipped = 0

# run interpolation step using xesmf
for row in df_tmp3.itertuples():
    if any ((line_ca==row.Line) & (sta_ca ==row.Station)):
        index = row[0]
        depthm = row[1]
        time_str = row[6] + ' ' + row[7][:2] + ':' + row[7][-2:]
        # identify the station to get index of station in dataset
        station_index = np.where((line_ca==row.Line) & (sta_ca ==row.Station))[0][0]
    
        #print(dt.datetime.strptime(df['start_time'][index], '%Y-%m-%d %H:%M:%S'))
        if index%10000 == 0:
            print(index,"--- %s seconds ---" % (time.time() - start_time)) # Tracking progress
    
        # regrid NEP values and interpolate depth and time
        df_tmp3['nep_o2'][index] = nep_cal_o2.o2[:,:,station_index].interp(depth=depthm).interp(time=dt.datetime.strptime(time_str, '%Y-%m-%d %H:%M'))
        
    else:
        rows_skipped += 1

new_csv = 'nep_calcofi_values'
df_tmp3.to_csv(new_csv)

0 --- 0.0015387535095214844 seconds ---
10000 --- 38.57476043701172 seconds ---
20000 --- 76.8038682937622 seconds ---
30000 --- 115.2847695350647 seconds ---
40000 --- 152.86257910728455 seconds ---
50000 --- 191.47012066841125 seconds ---
60000 --- 230.47573709487915 seconds ---
70000 --- 269.14416885375977 seconds ---
80000 --- 307.6197555065155 seconds ---
90000 --- 345.7242946624756 seconds ---
100000 --- 384.1991903781891 seconds ---
110000 --- 422.7982633113861 seconds ---
120000 --- 461.1408095359802 seconds ---
130000 --- 500.6310775279999 seconds ---
140000 --- 538.2263057231903 seconds ---
150000 --- 576.9078090190887 seconds ---
160000 --- 615.5693511962891 seconds ---
170000 --- 654.2668855190277 seconds ---
180000 --- 692.9116952419281 seconds ---
190000 --- 733.2143106460571 seconds ---
200000 --- 773.6826522350311 seconds ---
210000 --- 812.5942828655243 seconds ---
