In [None]:
"""
Created on Wed Mar 24 16:18 2020

This is a script to cut out the T and S and average them over the domains of 10, 25, 50, 100 km in front of the ice shelf and an offshore domain

@author: Clara Burgard
"""

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import assess_param_funcs.useful_functions as uf
import assess_param_funcs.T_S_profile_functions as tspf
import assess_param_funcs.melt_functions as meltf
import assess_param_funcs.box_functions as bf

from scipy.spatial import cKDTree


import itertools

import distributed
import glob

READ IN THE DATA

In [None]:
nemo_run = 'OPM006'

if nemo_run == 'OPM006':
    yy_start = 1989
    yy_end = 2018
elif nemo_run == 'OPM021':
    yy_start = 1989
    yy_end = 2018
elif nemo_run == 'OPM016' or nemo_run == 'OPM018':
    yy_start = 1980
    yy_end = 2008

In [None]:
inputpath_data='/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/NEMO_eORCA025.L121_'+nemo_run+'_ANT_STEREO/'
inputpath_profiles='/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/T_S_PROF/nemo_5km_'+nemo_run+'/'
inputpath_isf='/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/ANTARCTICA_IS_MASKS/nemo_5km_'+nemo_run+'/'

# make the domain a little smaller to make the computation even more efficient - file isf has already been made smaller at its creation
map_lim = [-3000000,3000000]

PREPARE MASK AROUND FRONT (TO RUN WITHOUT DASK!)

In [None]:
T_S_ocean_1980 = xr.open_dataset(inputpath_profiles+'T_S_theta_ocean_corrected_2000.nc')
file_isf_orig = xr.open_dataset(inputpath_isf+'nemo_5km_isf_masks_and_info_and_distance_new.nc')
nonnan_Nisf = file_isf_orig['Nisf'].where(np.isfinite(file_isf_orig['front_bot_depth_max']), drop=True).astype(int)
file_isf = file_isf_orig.sel(Nisf=nonnan_Nisf)

In [None]:
file_mask_orig = xr.open_dataset(inputpath_data+'mask_variables_of_interest_Ant_stereo.nc')
file_mask = uf.cut_domain_stereo(file_mask_orig, map_lim, map_lim).squeeze().drop('time')

In [None]:
lon = file_isf['longitude']
lat = file_isf['latitude']

In [None]:
ocean = np.isfinite(T_S_ocean_1980['theta_ocean'].isel(time=0,depth=0)).drop('time').drop('depth')
# only points below 1500 m
offshore = file_mask['bathy_metry'] > 1500 # .drop('lon').drop('lat')
# only points above 1500 m
contshelf = file_mask['bathy_metry'] <= 1500 # .drop('lon').drop('lat')

In [None]:
#mask_domains = (ocean & contshelf).load() #<= checked if it does what it should and it does! :)
#mask_domains = (ocean).load()
# NB: 5.0 x 1.75 is the effective resolution at 70S for a model of 1 degree resolution in longitude (assuming 5 delta X and a Mercator grid)
mask_domains = xr.DataArray([(ocean & contshelf), (ocean & offshore)],
                            dims={'profile_domain': ['close_cont_shelf','offshore'], 'y': contshelf.y, 'x': contshelf.x}).load()

lon_box = xr.DataArray(np.array([10.0, 10.0]), coords=[('profile_domain', ['close_cont_shelf','offshore'])])
lat_box = xr.DataArray(np.array([3.5, 3.5]), coords=[('profile_domain', ['close_cont_shelf','offshore'])])  

In [None]:
close_region_around_isf_mask = tspf.mask_boxes_around_IF_new(lon, lat, mask_domains, 
                                    file_isf['front_min_lon'], file_isf['front_max_lon'], 
                                    file_isf['front_min_lat'], file_isf['front_max_lat'],  
                                    lon_box, lat_box, 
                                    file_isf['isf_name'])

In [None]:
dist_list = [ ]
for kisf in tqdm(file_isf['Nisf']):
        
        if (file_isf['IF_mask']==kisf).sum() > 0:
            region_to_cut_out = close_region_around_isf_mask.sel(profile_domain='close_cont_shelf').sel(Nisf=kisf)
            region_to_cut_out = region_to_cut_out.where(region_to_cut_out > 0, drop=True)
            IF_region = file_isf['IF_mask'].where(file_isf['IF_mask']==kisf, drop=True)

            dist_from_front = tspf.distance_isf_points_from_line_small_domain(region_to_cut_out,IF_region)
            dist_list.append(dist_from_front)
    
dist_all = xr.concat(dist_list, dim='Nisf').reindex_like(file_isf)

In [None]:
dist_all.to_dataset(name='dist_from_front').to_netcdf(inputpath_profiles+'dist_to_ice_front_only_contshelf.nc')
#dist_all.to_dataset(name='dist_from_front').to_netcdf(inputpath_profiles+'dist_to_ice_front_whole_domain.nc')

In [None]:
close_region_around_isf_mask.sel(profile_domain='offshore').to_dataset(name='mask').to_netcdf(inputpath_profiles+'mask_offshore.nc')

COMPUTING THE MEAN PROFILES (TO RUN WITH DASK)

In [None]:
client = distributed.Client(n_workers=24, dashboard_address=':8795', local_directory='/tmp', memory_limit='6GB')

In [None]:
client

CONTINENTAL SHELF

In [None]:
bbox_da = xr.DataArray(np.array([10000., 25000., 50000., 100000.]), coords=[('dist_from_front', [10,25,50,100])])

If workers don't die (with 12 cores, took approx 1hour), if workers die, divide work by years

In [None]:
all_in_one = False # False if worker die, True if workers don't die
if all_in_one:
    dist_to_front_file = xr.open_mfdataset(inputpath_profiles+'dist_to_ice_front_only_contshelf.nc',chunks={'x': 50, 'y': 50})
    T_S_ocean_files = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_*.nc', concat_dim='time', chunks={'x': 50, 'y': 50, 'depth': 50}, parallel=True)
    #T_S_ocean_1980 = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_1990.nc',chunks={'x': 50, 'y': 50, 'depth': 50})
    T_S_ocean_1980 = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_2000.nc',chunks={'x': 50, 'y': 50, 'depth': 50})
else:
    dist_to_front_file = xr.open_mfdataset(inputpath_profiles+'dist_to_ice_front_only_contshelf.nc',chunks={'x': 100, 'y': 100})
    T_S_ocean_files = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_*.nc', concat_dim='time', chunks={'x': 100, 'y': 100, 'depth': 50}, parallel=True)
    #T_S_ocean_1980 = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_1990.nc',chunks={'x': 100, 'y': 100, 'depth': 50})
    T_S_ocean_1980 = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_2000.nc',chunks={'x': 100, 'y': 100, 'depth': 50})
dist_to_front = dist_to_front_file['dist_from_front']

Prepare sum

In [None]:
mask_km = dist_to_front <= bbox_da

In [None]:
ds_sum = (T_S_ocean_files * mask_km).sum(['x','y'])

In [None]:
if all_in_one:
    ds_sum = ds_sum.load()
    ds_sum.to_netcdf(inputpath_profiles+'ds_sum_for_mean_contshelf.nc')
else:
    yearly_datasets = list(tspf.split_by_chunks(ds_sum.unify_chunks(),'time'))
    paths = [tspf.create_filepath(ds, 'ds_sum_for_mean_contshelf', inputpath_profiles, ds.time[0].values) for ds in yearly_datasets]
    xr.save_mfdataset(datasets=yearly_datasets, paths=paths)

Prepare number of points by which you divide

In [None]:
if all_in_one:
    ds_sum = xr.open_mfdataset(inputpath_profiles+'ds_sum_for_mean_contshelf.nc')
else:
    ds_sum = xr.open_mfdataset(inputpath_profiles+'ds_sum_for_mean_contshelf_*.nc', concat_dim='time', parallel=True).drop('profile_domain')

In [None]:
mask_depth = T_S_ocean_1980['salinity_ocean'].squeeze().drop('time') >0
mask_all = mask_km & mask_depth

In [None]:
mask_sum = mask_all.sum(['x','y'])

In [None]:
mask_sum = mask_sum.load()

Make the mean

In [None]:
ds_mean = ds_sum/mask_sum

In [None]:
ds_mean.drop('profile_domain')

In [None]:
ds_mean = ds_mean.drop('profile_domain').rename({'dist_from_front': 'profile_domain'})

In [None]:
ds_mean.to_netcdf(inputpath_profiles+'T_S_mean_prof_corrected_km_contshelf_1980-2018.nc')

OFFSHORE PROFILES

In [None]:
T_S_ocean_files = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_*.nc', concat_dim='time', chunks={'x': 50, 'y': 50, 'depth': 50}, parallel=True)
T_S_ocean_1980 = xr.open_mfdataset(inputpath_profiles+'T_S_theta_ocean_corrected_2000.nc',chunks={'x': 50, 'y': 50, 'depth': 50})

In [None]:
mask_offshore_file = xr.open_mfdataset(inputpath_profiles+'mask_offshore.nc')
mask_offshore = mask_offshore_file['mask'].drop('profile_domain')
mask_depth = T_S_ocean_1980['salinity_ocean'].squeeze().drop('time') >0
mask_all_offshore = mask_offshore & mask_depth

In [None]:
ds_sum_offshore = (T_S_ocean_files * mask_offshore).sum(['x','y'])
ds_sum_offshore['profile_domain'] = np.array([1000])

In [None]:
ds_sum_offshore = ds_sum_offshore.load()
ds_sum_offshore.to_netcdf(inputpath_profiles+'ds_sum_for_mean_offshore.nc')

In [None]:
mask_sum_offshore = mask_all_offshore.sum(['x','y'])

In [None]:
mask_sum_offshore = mask_sum_offshore.load()

In [None]:
ds_mean_offshore = ds_sum_offshore/mask_sum_offshore

In [None]:
ds_mean_offshore.to_netcdf(inputpath_profiles+'T_S_mean_prof_corrected_km_offshore_1980-2018.nc')

COMBINE BOTH

In [None]:
ds_mean_offshore = xr.open_dataset(inputpath_profiles+'T_S_mean_prof_corrected_km_offshore_1980-2018.nc')
ds_mean = xr.open_dataset(inputpath_profiles+'T_S_mean_prof_corrected_km_contshelf_1980-2018.nc')#.drop('profile_domain').rename({'dist_from_front':'profile_domain'})

In [None]:
ds_mean_both = xr.concat([ds_mean, ds_mean_offshore], dim='profile_domain')
ds_mean_both.to_netcdf(inputpath_profiles+'T_S_mean_prof_corrected_km_contshelf_and_offshore_1980-2018_oneFRIS.nc')