In [None]:
"""
Created on Mon Jul 04 14:17 2022

Prepare csv for chunks in time 

Author: @claraburgard
"""

FOR EACH POINT:
- T and S extrapolated to ice draft depth
- Distance to front
- Distance to the grounding line
- ice draft zonal and meridional slope in x- and y-direction
- bedrock zonal and meridional slope in x- and y-direction
- Ice draft depth
- Bathymetry
- utide
- Ice draft concentration
- Max bathymetry 
- Target: melt m ice per yr

In [None]:
import numpy as np
import xarray as xr
import os.path
from tqdm.notebook import trange, tqdm
import basal_melt_neural_networks.data_formatting as dfmt

READ IN DATA

In [None]:
nemo_run0 = 'OPM021'

if nemo_run0 == 'OPM031-1' or nemo_run0 ==  'OPM031-2':
    nemo_run = 'OPM031'
else:
    nemo_run = nemo_run0

In [None]:
inputpath_data='/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/NEMO_eORCA025.L121_'+nemo_run+'_ANT_STEREO/'
inputpath_mask = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/ANTARCTICA_IS_MASKS/nemo_5km_'+nemo_run+'/'
inputpath_profiles = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/T_S_PROF/nemo_5km_'+nemo_run+'/'
inputpath_plumes = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/PLUMES/nemo_5km_'+nemo_run+'/'
inputpath_boxes = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/interim/BOXES/nemo_5km_'+nemo_run+'/'
outputpath_melt = '/bettik/burgardc/SCRIPTS/basal_melt_param/data/processed/MELT_RATE/nemo_5km_'+nemo_run+'/'
outputpath_nn = '/bettik/burgardc/DATA/NN_PARAM/interim/INPUT_DATA/EXTRAPOLATED_ISFDRAFT_CHUNKS/'
inputpath_tides = '/bettik/burgardc/DATA/BASAL_MELT_PARAM/interim/TIDES/'

Input variables

In [None]:
# T and S extrapolated to ice draft depth
if nemo_run == 'OPM031-2':
    T_S_2D_isfdraft = xr.open_mfdataset(inputpath_profiles+'T_S_2D_fields_isf_draft_oneFRIS.nc').sel(profile_domain=50).isel(time=range(30,70))
elif nemo_run == 'OPM031-1':
    T_S_2D_isfdraft = xr.open_mfdataset(inputpath_profiles+'T_S_2D_fields_isf_draft_oneFRIS.nc').sel(profile_domain=50).isel(time=range(30))
else:
    T_S_2D_isfdraft = xr.open_mfdataset(inputpath_profiles+'T_S_2D_fields_isf_draft_oneFRIS.nc').sel(profile_domain=50)

In [None]:
# dIF, dGL
file_isf_orig = xr.open_dataset(inputpath_mask+'nemo_5km_isf_masks_and_info_and_distance_new_oneFRIS.nc')
nonnan_Nisf = file_isf_orig['Nisf'].where(np.isfinite(file_isf_orig['front_bot_depth_max']), drop=True).astype(int)
file_isf_nonnan = file_isf_orig.sel(Nisf=nonnan_Nisf)
large_isf = file_isf_nonnan['Nisf'].where(file_isf_nonnan['isf_area_here'] >= 2500, drop=True)
file_isf = file_isf_nonnan.sel(Nisf=large_isf)

In [None]:
map_lim = [-3000000,3000000]
file_mask_orig = xr.open_dataset(inputpath_data+'other_mask_vars_Ant_stereo.nc')
file_mask_orig_cut = dfmt.cut_domain_stereo(file_mask_orig, map_lim, map_lim)
file_other = xr.open_dataset(inputpath_data+'corrected_draft_bathy_isf.nc')#, chunks={'x': chunk_size, 'y': chunk_size})
file_other_cut = dfmt.cut_domain_stereo(file_other, map_lim, map_lim)
file_conc = xr.open_dataset(inputpath_data+'isfdraft_conc_Ant_stereo.nc')
file_conc_cut = dfmt.cut_domain_stereo(file_conc, map_lim, map_lim)

In [None]:
# bathymetry, ice draft, concentration
file_bed_orig = file_mask_orig_cut['bathy_metry']
file_bed_corr = file_other_cut['corrected_isf_bathy']
file_draft = file_other_cut['corrected_isfdraft'] 
file_bed_goodGL = file_bed_orig.where(file_draft < file_bed_orig,file_bed_corr)
file_isf_conc = file_conc_cut['isfdraft_conc']

In [None]:
file_slope = xr.open_dataset(inputpath_mask+'nemo_5km_slope_info_bedrock_draft_latlon_oneFRIS.nc')

In [None]:
utide_file = xr.open_dataset(inputpath_tides + 'tidal_velocity_nemo_Ant_stereo.nc').rename({'ttv':'u_tide'})
u_tide = dfmt.cut_domain_stereo(utide_file['u_tide'], map_lim, map_lim)

Target

In [None]:
if nemo_run == 'OPM031-2':
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc').isel(time=range(30,70))
elif nemo_run == 'OPM031-1':
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc').isel(time=range(30))
else:
    NEMO_melt_rates_2D = xr.open_mfdataset(outputpath_melt+'melt_rates_2D_NEMO.nc')

melt_rate = NEMO_melt_rates_2D['melt_m_ice_per_y']

PREPARE ONE DATASET WITH EVERYTHING

In [None]:
geometry_2D = file_isf[['dGL', 'dIF']].merge(file_draft).merge(file_bed_goodGL).merge(file_slope).merge(file_isf_conc).merge(u_tide) 
geometry_2D['dIF'] = geometry_2D['dIF'].where(np.isfinite(geometry_2D['dIF']), np.nan)
time_dpdt_in = T_S_2D_isfdraft[['theta_in','salinity_in']].merge(melt_rate)

In [None]:
geometry_2D_br, time_dpdt_in_br = xr.broadcast(geometry_2D,time_dpdt_in)

In [None]:
final_input_xr = xr.merge([geometry_2D_br, time_dpdt_in_br]).transpose('y','x','time').drop('profile_domain').drop('Nisf').load()

In [None]:
final_input_xr

PREPARE CLEAN DATAFRAME WITH ALL DATA TO SAVE AND FEED TO THE NN

In [None]:
# CHECK WHERE PREVIOUS CHUNKS STOPPED
if os.path.exists(outputpath_nn+'info_chunks.txt'):
    with open(outputpath_nn+'info_chunks.txt') as f:
        lines = f.readlines()
    last_line = lines[-1]
    end_chunks = int(last_line.split(',')[0]) 
else:
    end_chunks = 0

In [None]:
chunk_all = int(np.ceil(len(final_input_xr.time)/10))

for chunk_nb in range(chunk_all):
    
    if chunk_nb == (chunk_all - 1):
        end_step = len(final_input_xr.time)
    else:
        end_step = chunk_nb*10+10
        
    trange = final_input_xr.time.isel(time=range(chunk_nb*10,end_step))
    print(trange.values)
    start_yy = trange[0].values
    end_yy = trange.max().values
    
    chunk_nb_new = chunk_nb+end_chunks+1
    file_info = open(outputpath_nn+'info_chunks.txt', 'a')
    file_info.write(str(chunk_nb_new).zfill(3)+','+nemo_run+','+str(start_yy)+','+str(end_yy)+'\n')
    file_info.close()

    for kisf in tqdm(file_isf.Nisf):
        ds_kisf = final_input_xr.sel(time=trange).where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')

        df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
        # remove rows where there are nans
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
        clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+str(chunk_nb_new).zfill(3)+'.csv')

In [None]:
final_input_xr.time.isel(time=range(30,39))

#########################

PREPARE LAT AND LON FOR A CHECK

In [None]:
latlon = file_isf[['latitude', 'longitude']].reset_coords(names=['longitude','latitude'])

In [None]:
latlon_br, salinity_for_nans = xr.broadcast(latlon,T_S_2D_isfdraft['salinity_in'].drop(['longitude','latitude']))
latlon_input_xr = xr.merge([latlon_br, salinity_for_nans]).transpose('y','x','time').drop('profile_domain').load()

In [None]:
file_info = pd.read_csv(outputpath_nn+'info_chunks.txt', delimiter=',', header=None)
file_info = file_info.set_index(file_info[0])

In [None]:
for chunk_nb in file_info[file_info[1]==nemo_run][0].values:
    
    start_yy = file_info[file_info[1]==nemo_run][2].loc[chunk_nb]
    end_yy = file_info[file_info[1]==nemo_run][3].loc[chunk_nb]
    print(chunk_nb,start_yy,end_yy)

    for kisf in tqdm(file_isf.Nisf):
        ds_kisf = latlon_input_xr.sel(time=range(start_yy,end_yy+1)).where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')

        df_kisf = ds_kisf.to_dataframe()
        # remove rows where there are nans
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
        clean_df_kisf.to_csv(outputpath_nn + 'dataframe_latlon_isf'+str(kisf.values).zfill(3)+'_'+str(chunk_nb).zfill(3)+'.csv')