In [None]:
#Figure 3 - October 2023
#Preprocessing of Chl-a inputs into monthly median
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
import pandas as pd
import glob
import dask

from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels
import statsmodels.api as sm

import cmocean
import colorcet as cc
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
cartopy.config["data_dir"] = './cartopy_shapefiles'

In [None]:
#VIIRS for median
ds_input = xr.open_mfdataset('/home/datawork-lops-oh/biogeo/AI/CNN_CHLORO/INPUT/Input_obs9_*.nc')
ds_input = ds_input.transpose('time', 'latitude', 'longitude')
a = ds_input.mask.where(ds_input.mask==6)
mask = np.isnan(a)
mask = ~mask   
ds_input = ds_input.assign(variables={"mask": (('latitude','longitude'), mask.data)}) 

ds_out = xr.open_mfdataset('/home2/datawork/epauthen/Globcolour_coarse/VIR*.nc')
ds_out = ds_out.rename({'CHL1_coarse':'chloro'})
ds_out = ds_out.sel(latitude = slice(-50,50))
ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
y_test = ds_out.where(ds_out.mask == 1).load()
#
# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
VIR = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 
VIR

In [None]:
#CCI
ds_out = xr.open_mfdataset('/home2/datawork/epauthen/Ocean-Colour-CCI/OC_CCI_Coarse/OC_CCI_chloro_a_*.nc')
ds_out = ds_out.rename({'chlor_a_coarse':'chloro'})
ds_out = ds_out.sel(latitude = slice(-50,50))
ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
y_test = ds_out.where(ds_out.mask == 1).load()
#
# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
CCI = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 

In [None]:
#GSM
data_input = '/home2/datawork/epauthen/Globcolour_coarse/'
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    MODVIR    = xr.open_mfdataset(data_input + "MODVIR_*.nc")
    MERMOD    = xr.open_mfdataset(data_input + "MERMOD_*.nc")
    MERMODVIR = xr.open_mfdataset(data_input + "MERMODVIR_*.nc")
    MERMODSWF = xr.open_mfdataset(data_input + "MERMODSWF_*.nc")
    SWF    = xr.open_mfdataset(data_input + "SWF_*.nc")
    SWF    = SWF.sel(time = slice('1997-09','2002-06'))
    ds_gsm = MODVIR.merge(MERMOD)
    ds_gsm = ds_gsm.merge(MERMODSWF)
    ds_gsm = ds_gsm.merge(SWF)
    ds_out = ds_gsm.merge(MERMODVIR)
    ds_out = ds_out.rename({'CHL1_coarse': 'chloro'})
    ds_out = ds_out.sel(latitude = slice(-50,50))
    ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
    y_test = ds_out.where(ds_out.mask == 1).load()
#
# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
GSM = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 


In [None]:
#Globcolour CMEMS for median
data_output = "/home/datawork-lops-oh/biogeo/AI/CNN_CHLORO/Globcolour_cmems_coarse/"
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out = xr.open_mfdataset(data_output + "Globcolour_CMEMS_chl_*.nc")
ds_out = ds_out.rename({'chl':'chloro'})
ds_out = ds_out.sel(latitude = slice(-50,50))
ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
y_test = ds_out.where(ds_out.mask == 1)

# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
GCMEMS = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 


In [None]:
#YU
data_output = "/home/datawork-lops-oh/biogeo/AI/CNN_CHLORO/Chloro_Yu_2023_coarse/"
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out = xr.open_mfdataset(data_output + "Yu_chloro_*.nc")
ds_out = ds_out.rename({'chl':'chloro'})
ds_out = ds_out.sel(latitude = slice(-50,50))
ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
y_test = ds_out.where(ds_out.mask == 1)

# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
YU = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 

In [None]:
#Multiobs
data_output = '/home/datawork-lops-oh/biogeo/AI/CNN_CHLORO/MULTIOBS_GLO_BIO_BGC_3D_REP_015_010/Surface/'
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out = xr.open_mfdataset(data_output + "CMEMS_chl_*.nc")
ds_out = ds_out.resample(time="1M").mean()
ds_out = ds_out.rename({'chl':'chloro'})
ds_out = ds_out.sel(latitude = slice(-50,50))
ds_bathy = xr.open_dataset('/home2/datawork/epauthen/ETOPO1_Ice_g_gmt4.grd', engine='netcdf4')
res = ds_bathy.z.interp(x=ds_out.longitude, y=ds_out.latitude,method = 'linear')
ds_out = ds_out.assign(variables={"bathymetry": (('latitude','longitude'), res.data)})
ds_out = ds_out.where(ds_out.bathymetry < -200)
y_test = ds_out.drop(['bathymetry'])

# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
MCMEMS = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 

In [None]:
#AVW
data_input = '/home2/datawork/epauthen/Globcolour_AVW_coarse/'
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    MODVIR    = xr.open_mfdataset(data_input + "MODVIR_*.nc")
    MERMOD    = xr.open_mfdataset(data_input + "MERMOD_*.nc")
    MERMODVIR = xr.open_mfdataset(data_input + "MERMODVIR_*.nc")
    MERMODSWF = xr.open_mfdataset(data_input + "MERMODSWF_*.nc")
    SWF    = xr.open_mfdataset(data_input + "SWF_*.nc")
    SWF    = SWF.sel(time = slice('1997-09','2002-06'))
    ds_gsm = MODVIR.merge(MERMOD)
    ds_gsm = ds_gsm.merge(MERMODSWF)
    ds_gsm = ds_gsm.merge(SWF)
    ds_out = ds_gsm.merge(MERMODVIR)
    ds_out = ds_out.rename({'CHL1_coarse': 'chloro'})
    ds_out = ds_out.sel(latitude = slice(-50,50))
    ds_out = ds_out.assign(variables={"mask": (('latitude','longitude'), ds_input.mask.data)}) 
    y_test = ds_out.where(ds_out.mask == 1).load()

# Compute weighted median by timestep
weights = np.cos(np.deg2rad(y_test.latitude))
weights.name = "weights"
y_testw = y_test.chloro.weighted(weights)
chloro_qua = y_testw.quantile(dim = ("longitude", "latitude"),q=0.5)
AVW = y_test.assign(variables={"chloro_qua": (('time'), chloro_qua.data)}) 


In [None]:
path_out = '/home/datawork-lops-oh/biogeo/AI/CNN_CHLORO/Preproc_GRL/'
VIR.chloro_qua.to_netcdf(path_out + 'VIIRS_median.nc')
GCMEMS.chloro_qua.to_netcdf(path_out + 'GCMEMS_median.nc')
GSM.chloro_qua.to_netcdf(path_out + 'GSM_median.nc')
CCI.chloro_qua.to_netcdf(path_out + 'CCI_median.nc')
YU.chloro_qua.to_netcdf(path_out + 'YU_median.nc')
MCMEMS.chloro_qua.to_netcdf(path_out + 'MCMEMS_median.nc')
AVW.chloro_qua.to_netcdf(path_out + 'AVW_median.nc')
