In [None]:
"""
Created on Wed Apr 13 14:17 2022

Prepare csv for individual runs 

Author: @claraburgard
"""

FOR EACH POINT:
- T and S extrapolated to ice draft depth
- T and S mean
- Distance to front
- Distance to the grounding line
- ice draft zonal and meridional slope in x- and y-direction
- bedrock zonal and meridional slope in x- and y-direction
- Ice draft depth
- Bathymetry
- utide
- Ice draft concentration
- Max bathymetry 
- Target: melt m ice per yr

In [None]:
import numpy as np
import xarray as xr
from tqdm.notebook import trange, tqdm
import summer_paper.data_formatting_NN as dfmt

READ IN DATA

In [None]:
mod = 'CNRM-CM6-1' # 'EPM026','EPM031', 'EPM034'
scenario = 'historical'
to2300 = False

if scenario == 'historical':
    yystart = 1980 #1850
    yyend = 2014
else:
    if to2300:
        yystart = 2015
        yyend = 2300
    else:
        yystart = 2015
        yyend = 2100   

In [None]:


inputpath_data='/bettik/burgardc/DATA/SUMMER_PAPER/interim/'
inputpath_mask='/bettik/burgardc/DATA/SUMMER_PAPER/interim/ANTARCTICA_IS_MASKS/BedMachine_4km/'
inputpath_profiles='/bettik/burgardc/DATA/SUMMER_PAPER/interim/T_S_PROF/CMIP/'+mod+'/'
inputpath_boxes = '/bettik/burgardc/DATA/SUMMER_PAPER/interim/BOXES/BedMachine_4km/'
inputpath_plumes = '/bettik/burgardc/DATA/SUMMER_PAPER/interim/PLUMES/BedMachine_4km/'

outputpath_nn = '/bettik/burgardc/DATA/SUMMER_PAPER/interim/INPUT_DATA/CMIP/'+mod+'/'

outputpath = '/bettik/burgardc/DATA/SUMMER_PAPER/interim/'

Input variables

In [None]:
map_lim = [-3000000,3000000]

In [None]:
# dIF, dGL
inputpath_isf='/bettik/burgardc/DATA/SUMMER_PAPER/interim/ANTARCTICA_IS_MASKS/BedMachine_4km/'
file_isf_orig = xr.open_dataset(inputpath_isf+'BedMachinev2_4km_isf_masks_and_info_and_distance_oneFRIS.nc')
nonnan_Nisf = file_isf_orig['Nisf'].where(np.isfinite(file_isf_orig['front_bot_depth_max']), drop=True).astype(int)
file_isf_nonnan = file_isf_orig.sel(Nisf=nonnan_Nisf)
rignot_isf = file_isf_nonnan.Nisf.where(np.isfinite(file_isf_nonnan['isf_area_rignot']), drop=True)
file_isf = file_isf_nonnan.sel(Nisf=rignot_isf)

# bathymetry, ice draft, concentration
BedMachine_orig = xr.open_dataset(inputpath_data+'BedMachine_v2_aggregated4km_allvars.nc')
BedMachine_orig_cut = dfmt.cut_domain_stereo(BedMachine_orig, map_lim, map_lim)
file_bed_goodGL = -1*BedMachine_orig_cut['bed']
file_draft = (BedMachine_orig_cut['thickness'] - BedMachine_orig_cut['surface']).where(file_isf['ISF_mask'] > 1)
file_isf_conc = BedMachine_orig_cut['isf_conc']

# ice and bed slopes
file_slope = xr.open_dataset(inputpath_mask+'BedMachine_4km_slope_info_bedrock_draft_latlon_oneFRIS.nc')

In [None]:
for tt in tqdm(range(yystart,yyend+1)): #yyend+1)): #continue at 2070
#for tt in tqdm(range(1867,yyend+1)):

    # T and S extrapolated to ice draft depth
    T_S_2D_isfdraft = xr.open_dataset(inputpath_profiles+'T_S_2D_fields_isf_draft_'+mod+'_'+scenario+'_'+str(tt)+'.nc').squeeze().drop('time')
    
    # T and S mean and std
    T_S_2D_meanstd = xr.open_dataset(inputpath_profiles + 'T_S_2D_meanstd_isf_draft_'+mod+'_'+scenario+'_'+str(tt)+'.nc')
    
    time_dpdt_in = file_isf[['dGL', 'dIF']].merge(file_draft.rename('corrected_isfdraft')
                                                 ).merge(file_bed_goodGL.rename('bathy_metry')
                                                        ).merge(file_slope).merge(file_isf_conc).merge(T_S_2D_isfdraft[['theta_in','salinity_in']]).merge(T_S_2D_meanstd)
                                                         
    time_dpdt_in['dIF'] = time_dpdt_in['dIF'].where(np.isfinite(time_dpdt_in['dIF']), np.nan)
    
    for kisf in file_isf.Nisf:
        ds_kisf = time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

        df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
        # remove rows where there are nans
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
        clean_df_kisf['time'] = clean_df_kisf['time'].dt.year
        clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+mod+'_'+scenario+'_'+str(tt)+'.csv')

In [None]:
# T and S extrapolated to ice draft depth
T_S_2D_isfdraft = xr.open_mfdataset(inputpath_profiles+'T_S_2D_fields_isf_draft_'+mod+'_'+scenario+'_*.nc', combine='nested', concat_dim='time') #, chunks=({'time': 5}

# T and S mean and std
T_S_2D_meanstd = xr.open_dataset(inputpath_profiles+'T_S_2D_meanstd_isf_draft_'+mod+'_'+scenario+'.nc')

In [None]:
for tt in tqdm(range(yystart,yyend+1)):

    T_S_2D_isfdraft_tt = T_S_2D_isfdraft.sel(time=tt).load()
    T_S_2D_meanstd_tt = T_S_2D_meanstd.sel(time=tt)

    time_dpdt_in = file_isf[['dGL', 'dIF']].merge(file_draft.rename('corrected_isfdraft')
                                                 ).merge(file_bed_goodGL.rename('bathy_metry')
                                                        ).merge(file_slope).merge(file_isf_conc).merge(T_S_2D_isfdraft_tt[['theta_in','salinity_in']]).merge(T_S_2D_meanstd_tt)

    time_dpdt_in['dIF'] = time_dpdt_in['dIF'].where(np.isfinite(time_dpdt_in['dIF']), np.nan)
    
    li = []
    for kisf in file_isf.Nisf:
        ds_kisf = time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

        df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
        li.append(df_kisf)

    df_allkisf = pd.concat(li)
    # remove rows where there are nans
    clean_df_allkisf = df_allkisf.dropna()
    clean_df_allkisf = clean_df_allkisf .where(clean_df_allkisf ['salinity_in']!=0).dropna()
    clean_df_tt = clean_df_allkisf.set_index(['time'], append=True)
    clean_df_tt.to_csv(outputpath_nn + 'dataframe_input_allisf_'+mod+'_'+scenario+'_'+str(tt)+'.csv')



In [None]:
    for kisf in file_isf.Nisf:
        ds_kisf = time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

        df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
        # remove rows where there are nans
        clean_df_kisf = df_kisf.dropna()
        clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
        clean_df_kisf['time'] = tt
        #clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+mod+'_'+scenario+'_'+str(tt)+'.csv')

In [None]:
for kisf in file_isf.Nisf:
    ds_kisf = time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

    df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
    # remove rows where there are nans
    clean_df_kisf = df_kisf.dropna()
    clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
    clean_df_kisf['time'] = clean_df_kisf['time'].dt.year
    clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+mod+'_'+scenario+'_'+str(tt)+'.csv')

In [None]:
time_dpdt_in.where(file_isf['ISF_mask'] == kisf, drop=True)

PREPARE ONE DATASET WITH EVERYTHING

In [None]:
time_dpdt_in.merge(u_tide)

In [None]:
geometry_2D_br, time_dpdt_in_br = xr.broadcast(geometry_2D,time_dpdt_in)

In [None]:
final_input_xr = xr.merge([geometry_2D_br, time_dpdt_in_br]).transpose('y','x','time').drop('profile_domain').load()

PREPARE CLEAN DATAFRAME WITH ALL DATA TO SAVE AND FEED TO THE NN

In [None]:
for kisf in tqdm(file_isf.Nisf):
    ds_kisf = final_input_xr.where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')
    
    df_kisf = ds_kisf.drop('longitude').drop('latitude').to_dataframe()
    # remove rows where there are nans
    clean_df_kisf = df_kisf.dropna()
    clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
    clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+nemo_run0+'.csv')

In [None]:
clean_df_kisf.dtypes

#########################

SAVE TO CSV

In [None]:
clean_df_yy.to_csv(outputpath_nn + 'dataframe_input_'+nemo_run+'.csv')

In [None]:
# remove index (time, x, y)
clean_df_yy.reset_index(drop=True, inplace=True)

PREPARE LAT AND LON FOR A CHECK

In [None]:
latlon = file_isf[['latitude', 'longitude']].reset_coords(names=['longitude','latitude'])

In [None]:
latlon_br, salinity_for_nans = xr.broadcast(latlon,T_S_2D_isfdraft['salinity_in'].drop(['longitude','latitude']))
latlon_input_xr = xr.merge([latlon_br, salinity_for_nans]).transpose('y','x','time').drop('profile_domain').load()

In [None]:
for kisf in tqdm(file_isf.Nisf):
    ds_kisf = latlon_input_xr.where(file_isf['ISF_mask'] == kisf, drop=True).drop('Nisf')
    df_kisf = ds_kisf.to_dataframe()
    # remove rows where there are nans
    clean_df_kisf = df_kisf.dropna()
    clean_df_kisf = clean_df_kisf.where(clean_df_kisf['salinity_in']!=0).dropna()
    #clean_df_kisf.to_csv(outputpath_nn + 'dataframe_input_isf'+str(kisf.values).zfill(3)+'_'+nemo_run0+'.csv')