In [None]:
"""
Created on Tue Jul 05 14:17 2022

Prepare csv with whole vertical profiles in chunks

Author: @claraburgard
"""

FOR EACH POINT:
- T and S profiles at each point
- Distance to front
- Distance to the grounding line
- ice draft zonal and meridional slope in x- and y-direction
- bedrock zonal and meridional slope in x- and y-direction
- Ice draft depth
- Bathymetry
- utide
- Ice draft concentration
- Max bathymetry 
- Target: melt m ice per yr

In [None]:
import numpy as np
import xarray as xr
import pandas as pd
from tqdm.notebook import trange, tqdm
import basal_melt_neural_networks.data_formatting as dfmt

In [None]:
%matplotlib qt5

READ IN DATA

In [None]:
nemo_run0 = 'OPM016'

if nemo_run0 == 'OPM031-1' or nemo_run0 ==  'OPM031-2':
    nemo_run = 'OPM031'
else:
    nemo_run = nemo_run0
    

In [None]:
inputpath_data='/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/NEMO_eORCA025.L121_'+nemo_run+'_ANT_STEREO/'
inputpath_mask = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/ANTARCTICA_IS_MASKS/nemo_5km_'+nemo_run+'/'
inputpath_profiles = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/T_S_PROF/nemo_5km_'+nemo_run+'/'
inputpath_plumes = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/PLUMES/nemo_5km_'+nemo_run+'/'
inputpath_boxes = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/BOXES/nemo_5km_'+nemo_run+'/'
outputpath_melt = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/processed/MELT_RATE/nemo_5km_'+nemo_run+'/'
outputpath_nn = '/bettik/burgardc/DATA/NN_PARAM/interim/INPUT_DATA/WHOLE_PROF_CHUNKS/'
inputpath_info = '/bettik/burgardc/DATA/NN_PARAM/interim/INPUT_DATA/EXTRAPOLATED_ISFDRAFT_CHUNKS/'
inputpath_tides = '/bettik/burgardc/DATA/BASAL_MELT_PARAM/interim/TIDES/'

Input variables

In [None]:
# make the domain a little smaller to make the computation even more efficient - file isf has already been made smaller at its creation
map_lim = [-3000000,3000000]

chunk_size = 300

In [None]:
file_isf_orig = xr.open_dataset(inputpath_mask+'nemo_5km_isf_masks_and_info_and_distance_new_oneFRIS.nc')
nonnan_Nisf = file_isf_orig['Nisf'].where(np.isfinite(file_isf_orig['front_bot_depth_max']), drop=True).astype(int)
file_isf_nonnan = file_isf_orig.sel(Nisf=nonnan_Nisf)
large_isf = file_isf_nonnan['Nisf'].where(file_isf_nonnan['isf_area_here'] >= 2500, drop=True)
file_isf = file_isf_nonnan.sel(Nisf=large_isf)

In [None]:
file_TS_orig = xr.open_dataset(inputpath_profiles+'T_S_mean_prof_corrected_km_contshelf_and_offshore_1980-2018_oneFRIS.nc')
file_TS = file_TS_orig.sel(Nisf=large_isf)
file_TS_dom = file_TS.sel(profile_domain=50).drop('profile_domain')

In [None]:
file_TS_cut_bot = file_TS_dom.dropna('depth', how='all')
file_TS_cut_top_bot = xr.concat([file_TS_cut_bot.isel(depth=0),file_TS_cut_bot.where(file_TS_dom.depth >= 100, drop=True)], dim='depth')

In [None]:
plt.scatter(range(68),file_TS_cut_top_bot.depth)

In [None]:
file_TS_cut_top_bot.depth - file_TS_cut_top_bot.depth.shift(depth=1)

In [None]:
map_lim = [-3000000,3000000]
file_mask_orig = xr.open_dataset(inputpath_data+'other_mask_vars_Ant_stereo.nc')
file_mask_orig_cut = dfmt.cut_domain_stereo(file_mask_orig, map_lim, map_lim)
file_other = xr.open_dataset(inputpath_data+'corrected_draft_bathy_isf.nc')#, chunks={'x': chunk_size, 'y': chunk_size})
file_other_cut = dfmt.cut_domain_stereo(file_other, map_lim, map_lim)
file_conc = xr.open_dataset(inputpath_data+'isfdraft_conc_Ant_stereo.nc')
file_conc_cut = dfmt.cut_domain_stereo(file_conc, map_lim, map_lim)

In [None]:
# bathymetry, ice draft, concentration
file_bed_orig = file_mask_orig_cut['bathy_metry']
file_bed_corr = file_other_cut['corrected_isf_bathy']
file_draft = file_other_cut['corrected_isfdraft'] 
file_bed_goodGL = file_bed_orig.where(file_draft < file_bed_orig,file_bed_corr)
file_isf_conc = file_conc_cut['isfdraft_conc']

In [None]:
file_slope = xr.open_dataset(inputpath_mask+'nemo_5km_slope_info_bedrock_draft_latlon_oneFRIS.nc')

In [None]:
utide_file = xr.open_dataset(inputpath_tides + 'tidal_velocity_nemo_Ant_stereo.nc').rename({'ttv':'u_tide'})
u_tide = dfmt.cut_domain_stereo(utide_file['u_tide'], map_lim, map_lim)

Target

In [None]:
if nemo_run == 'OPM031-2':
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc').isel(time=range(30,70))
elif nemo_run == 'OPM031-1':
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc').isel(time=range(30))
else:
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc')

melt_rate = NEMO_melt_rates_2D['melt_m_ice_per_y']

PREPARE ONE DATASET WITH EVERYTHING (EXCEPT T AND S)

In [None]:
geometry_2D = file_isf[['dGL', 'dIF']].merge(file_draft).merge(file_bed_goodGL).merge(file_slope).merge(file_isf_conc).merge(u_tide) 
geometry_2D['dIF'] = geometry_2D['dIF'].where(np.isfinite(geometry_2D['dIF']), np.nan)
#time_dpdt_in = T_S_2D_isfdraft.transpose('time','y','x').merge(melt_rate)

PREPARE CLEAN DATAFRAME WITH ALL DATA FOR EACH ICE SHELF (T AND S INCLUDED)

In [None]:
chunk_info = pd.read_csv(inputpath_info+'info_chunks.txt',header=None,index_col=0)
chunk_info.rename(columns = {1:'RUN'}, inplace = True)
chunk_info.rename(columns = {2:'STARTYY'}, inplace = True)
chunk_info.rename(columns = {3:'ENDYY'}, inplace = True)

In [None]:
run_idx = chunk_info.where(chunk_info['RUN'] == nemo_run).dropna().index.tolist()

for kisf in tqdm(file_isf.Nisf):
    geometry_2D_isf = geometry_2D.where(file_isf['ISF_mask']==kisf,drop=True).drop('longitude').drop('latitude')
    melt_rate_isf = melt_rate.where(file_isf['ISF_mask']==kisf,drop=True)
    T_S_2D_isf = xr.open_dataset(inputpath_profiles+'flattened_T_S_profiles_isf'+str(kisf.values).zfill(3)+'.nc').squeeze().transpose('time','y','x').drop('longitude').drop('latitude')
    
    time_dpdt_in = T_S_2D_isf.merge(melt_rate_isf).drop('longitude').drop('latitude')
    geometry_2D_br_isf, time_dpdt_in_br = xr.broadcast(geometry_2D_isf,time_dpdt_in)
    final_input_xr_isf = xr.merge([geometry_2D_br_isf, time_dpdt_in_br]).transpose('y','x','time').drop('Nisf')
    ds_kisf = final_input_xr_isf
    
    for chunk_nb_new in run_idx:
        
        trange = final_input_xr_isf.time.sel(time=range(chunk_info['STARTYY'].loc[chunk_nb_new], chunk_info['ENDYY'].loc[chunk_nb_new]+1))
        #print(trange.values)                        
        df_kisf = ds_kisf.sel(time=trange).to_dataframe()
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['S_003']!=0).dropna()
        clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+str(chunk_nb_new).zfill(3)+'.csv')
    

In [None]:
kisf = file_isf.Nisf.sel(Nisf=66)
T_S_2D_isf = xr.open_dataset(inputpath_profiles+'flattened_T_S_profiles_isf'+str(kisf.values).zfill(3)+'.nc').squeeze().transpose('time','y','x').drop('longitude').drop('latitude')

In [None]:
T_S_2D_isf

####################

In [None]:
clean_df_kisf

In [None]:
# remove index (time, x, y)
clean_df_yy.reset_index(drop=True, inplace=True)

In [None]:
file_isf.Nisf