In [None]:
"""
Created on Wed Apr 13 14:17 2022

Prepare csv for individual runs 

Author: @claraburgard
"""

FOR EACH POINT:
- T and S extrapolated to ice draft depth
- T and S mean
- Distance to front
- Distance to the grounding line
- ice draft zonal and meridional slope in x- and y-direction
- bedrock zonal and meridional slope in x- and y-direction
- Ice draft depth
- Bathymetry
- utide
- Ice draft concentration
- Max bathymetry 
- Target: melt m ice per yr

In [None]:
import numpy as np
import xarray as xr
from tqdm.notebook import trange, tqdm
import basal_melt_neural_networks.data_formatting as dfmt

READ IN DATA

In [None]:
nemo_run = 'EPM034' # 'EPM031', 'EPM034'


if nemo_run == 'EPM031' or nemo_run=='EPM026':
    yy_start = 2049
    yy_end = 2058
elif nemo_run == 'EPM034':
    yy_start = 2119
    yy_end = 2128

In [None]:
inputpath_data='/bettik/burgardc/DATA/NN_PARAM/interim/NEMO_eORCA025.L121_'+nemo_run+'_ANT_STEREO/'
inputpath_mask = '/bettik/burgardc/DATA/NN_PARAM/interim/ANTARCTICA_IS_MASKS/nemo_5km_'+nemo_run+'/'
inputpath_profiles = '/bettik/burgardc/DATA/NN_PARAM/interim/T_S_PROF/nemo_5km_'+nemo_run+'/'
inputpath_plumes = '/bettik/burgardc/DATA/NN_PARAM/interim/PLUMES/nemo_5km_'+nemo_run+'/'
inputpath_boxes = '/bettik/burgardc/DATA/NN_PARAM/interim/BOXES/nemo_5km_'+nemo_run+'/'

outputpath_melt = '/bettik/burgardc/DATA/NN_PARAM/processed/MELT_RATE/nemo_5km_'+nemo_run+'/'
outputpath_nn = '/bettik/burgardc/DATA/NN_PARAM/interim/INPUT_DATA/PIERRE_'+nemo_run+'_EXTRAPDRAFT_CHUNKS/'
inputpath_tides = '/bettik/burgardc/DATA/BASAL_MELT_PARAM/interim/TIDES/'

outputpath = '/bettik/burgardc/DATA/NN_PARAM/interim/'

Input variables

In [None]:
map_lim = [-3000000,3000000]


utide_file = xr.open_dataset(inputpath_tides + 'tidal_velocity_nemo_Ant_stereo.nc').rename({'ttv':'u_tide'})
u_tide = dfmt.cut_domain_stereo(utide_file['u_tide'], map_lim, map_lim)

In [None]:
for tt in tqdm(range(yy_start,yy_end+1)): #continue at 1978
    
    file_mask_orig = xr.open_dataset(inputpath_data+'other_mask_vars_Ant_stereo_'+str(tt)+'.nc')
    file_mask_orig_cut = dfmt.cut_domain_stereo(file_mask_orig, map_lim, map_lim)

    file_other = xr.open_dataset(inputpath_data+'corrected_draft_bathy_isf_'+str(tt)+'.nc')#, chunks={'x': chunk_size, 'y': chunk_size})
    file_other_cut = dfmt.cut_domain_stereo(file_other, map_lim, map_lim)

    file_conc = xr.open_dataset(inputpath_data+'isfdraft_conc_Ant_stereo_'+str(tt)+'.nc')
    file_conc_cut = dfmt.cut_domain_stereo(file_conc, map_lim, map_lim)


    # T and S extrapolated to ice draft depth
    T_S_2D_isfdraft = xr.open_dataset(inputpath_profiles+'T_S_2D_fields_isf_draft_oneFRIS_'+str(tt)+'.nc').squeeze().drop('time')
    
    # T and S mean and std
    T_S_2D_meanstd = xr.open_dataset(inputpath_profiles + 'T_S_2D_meanstd_isf_draft_oneFRIS_'+str(tt)+'.nc')
    
    # dIF, dGL
    file_isf_orig = xr.open_dataset(inputpath_mask+'nemo_5km_isf_masks_and_info_and_distance_oneFRIS_'+str(tt)+'.nc')
    nonnan_Nisf = file_isf_orig['Nisf'].where(np.isfinite(file_isf_orig['front_bot_depth_max']), drop=True).astype(int)
    file_isf_nonnan = file_isf_orig.sel(Nisf=nonnan_Nisf)
    large_isf = file_isf_nonnan['Nisf'].where(file_isf_nonnan['isf_area_here'] >= 2500, drop=True)
    file_isf = file_isf_nonnan.sel(Nisf=large_isf)
    
    # bathymetry, ice draft, concentration
    file_bed_orig = file_mask_orig_cut['bathy_metry']
    file_bed_corr = file_other_cut['corrected_isf_bathy']
    file_draft = file_other_cut['corrected_isfdraft']
    file_bed_goodGL = file_bed_orig.where(file_draft < file_bed_orig,file_bed_corr)
    file_isf_conc = file_conc_cut['isfdraft_conc']
    
    file_slope = xr.open_dataset(inputpath_mask+'nemo_5km_slope_info_bedrock_draft_latlon_oneFRIS_'+str(tt)+'.nc')
    
    # Target
    NEMO_melt_rates_2D = xr.open_dataset(outputpath_melt+'melt_rates_2D_NEMO_'+str(tt)+'.nc')
    melt_rate = NEMO_melt_rates_2D['melt_m_ice_per_y']
    
    time_dpdt_in = file_isf[['dGL', 'dIF']].merge(file_draft).merge(file_bed_goodGL).merge(file_slope).merge(file_isf_conc).merge(T_S_2D_isfdraft[['theta_in','salinity_in']]).merge(T_S_2D_meanstd).merge(melt_rate).merge(u_tide)
    time_dpdt_in['dIF'] = time_dpdt_in['dIF'].where(np.isfinite(time_dpdt_in['dIF']), np.nan)
    
    for kisf in file_isf.Nisf:
        ds_kisf = time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

        df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
        # remove rows where there are nans
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
        clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+str(tt)+'_'+nemo_run+'.csv')

In [None]:
tt

In [None]:
time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

PREPARE ONE DATASET WITH EVERYTHING

In [None]:
time_dpdt_in.merge(u_tide)

In [None]:
geometry_2D_br, time_dpdt_in_br = xr.broadcast(geometry_2D,time_dpdt_in)

In [None]:
final_input_xr = xr.merge([geometry_2D_br, time_dpdt_in_br]).transpose('y','x','time').drop('profile_domain').load()

PREPARE CLEAN DATAFRAME WITH ALL DATA TO SAVE AND FEED TO THE NN

In [None]:
for kisf in tqdm(file_isf.Nisf):
    ds_kisf = final_input_xr.where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')
    
    df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
    # remove rows where there are nans
    clean_df_kisf = df_kisf.dropna()
    clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
    clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+nemo_run0+'.csv')

In [None]:
clean_df_kisf.dtypes

#########################

SAVE TO CSV

In [None]:
clean_df_yy.to_csv(outputpath_nn + 'dataframe_input_'+nemo_run+'.csv')

In [None]:
# remove index (time, x, y)
clean_df_yy.reset_index(drop=True, inplace=True)

PREPARE LAT AND LON FOR A CHECK

In [None]:
latlon = file_isf[['latitude', 'longitude']].reset_coords(names=['longitude','latitude'])

In [None]:
latlon_br, salinity_for_nans = xr.broadcast(latlon,T_S_2D_isfdraft['salinity_in'].drop(['longitude','latitude']))
latlon_input_xr = xr.merge([latlon_br, salinity_for_nans]).transpose('y','x','time').drop('profile_domain').load()

In [None]:
for kisf in tqdm(file_isf.Nisf):
    ds_kisf = latlon_input_xr.where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')
    df_kisf = ds_kisf.to_dataframe()
    # remove rows where there are nans
    clean_df_kisf = df_kisf.dropna()
    clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
    #clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+nemo_run0+'.csv')