# Input data requirements

The input ice sheet model should be a netCDF file. 


### `Lithk` variable
The uploaded model to contain thickness data (the `lithk` variable) for the comparison.


In [1]:
import os
import numpy as np
import xarray as xr

import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from datetime import timedelta 
import cftime 
from datetime import datetime

# note: suppress numpy.dtype size changed warnings
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")

warnings.filterwarnings('ignore')

### Configure IMBIE comparison

In [2]:
# Define the flag for the ice sheet region Greenland or Antarctica
icesheet = "Antarctica" # Change to "Antarctica" or "Greenland"

# Set start and end dates
start_date = '2006-01-01'
end_date ='2015-01-01'

# Set shapefile path and projection and IMBIE csv_file
if icesheet == "Greenland":
    projection = "EPSG:3413"  # Greenland
    #Set the model data dir path
    nc_filename='/home/jovyan/CmCt/notebooks/Gravimetry/lithk_GIS_JPL_ISSM_asmb.nc'
    #Set the shape data dir path
    shape_filename = "/home/jovyan/CmCt/data/IMBIE/Greenland_Basins_PS_v1.4.2/Greenland_Basins_PS_v1.4.2.shp"
    #Set the observation data dir path
    obs_filename = '/home/jovyan/CmCt/data/IMBIE/imbie_greenland_2022_Gt_partitioned_v0.csv'
    # obs_filename = '/home/jovyan/CmCt/data/IMBIE/imbie_greenland_2021_Gt.csv'
    
elif icesheet== "Antarctica":
    projection = "EPSG:3031"  # Antarctica
    
    #Set the model data dir path
    nc_filename='/home/jovyan/CmCt/notebooks/Gravimetry/lithk_AIS_AWI_PISM1_hist_std.nc'
    #Set the shape data dir path
    shape_filename = "/home/jovyan/CmCt/data/IMBIE/ANT_Basins_IMBIE2_v1.6/ANT_Basins_IMBIE2_v1.6.shp"
    #Set the observation data dir path
    obs_filename = '/home/jovyan/CmCt/data/IMBIE/imbie_antarctica_2022_Gt_partitioned_v0.csv'
    # obs_filename = '/home/jovyan/CmCt/notebooks/IMBIE/imbie_antarctica_2021_Gt.csv'
    
    ##Set the Region observation data dir path
    obs_east_filename = '/home/jovyan/CmCt/data/IMBIE/imbie_east_antarctica_2022_Gt_partitioned_v0.csv'
    obs_west_filename = '/home/jovyan/CmCt/data/IMBIE/imbie_west_antarctica_2022_Gt_partitioned_v0.csv'
    obs_peninsula_filename= '/home/jovyan/CmCt/data/IMBIE/imbie_antarctic_peninsula_2022_Gt_partitioned_v0.csv'

    # Check if regional observation files exist 
    if not os.path.exists(obs_east_filename):
        raise FileNotFoundError(f"Observation file not found: {obs_east_filename}")
    if not os.path.exists(obs_west_filename):
        raise FileNotFoundError(f"Observation file not found: {obs_west_filename}")
    if not os.path.exists(obs_peninsula_filename):
        raise FileNotFoundError(f"Observation file not found: {obs_peninsula_filename}")
else:
    raise ValueError("Invalid iceshee value. Must be 'Greenland' or 'Antarctica'.")


# Check if  observation file exists
if not os.path.exists(obs_filename):
    raise FileNotFoundError(f"Observation file not found: {obs_filename}")


# Check if model file exists    
if not os.path.exists(nc_filename):
    raise FileNotFoundError(f"Model file not found: {nc_filename}")



# Select  variable for mass balance comparision
mass_balance_column="Cumulative mass balance (Gt)"# "Cumulative dynamics mass balance anomaly (Gt)"
mass_balance_column_clean = mass_balance_column.replace(' (Gt/yr)', '')

## Load the model data

In [5]:
#Model data
gis_ds = xr.open_dataset(nc_filename)
lithk = gis_ds['lithk']
time_var = gis_ds['time']

# Get the minimum and maximum values directly from the time variable
min_time = time_var.values.min()
max_time = time_var.values.max()

print(f"Time range: {min_time} to {max_time}")


#Check if the selected start and end dates are within the range
start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')
# Convert the start and end dates to cftime.datetime objects   
start_date1 = cftime.DatetimeNoLeap(start_date_dt.year, start_date_dt.month, start_date_dt.day)
end_date1 = cftime.DatetimeNoLeap(end_date_dt.year, end_date_dt.month, end_date_dt.day)

# Check
if min_time <= start_date1 <= max_time and min_time <= end_date1 <= max_time:
    print(f"The selected dates {start_date} and {end_date} are within the range of the model data.")
else:
    raise ValueError(f"Error: The selected dates {start_date} or {end_date} are out of range. Model data time range is from {min_time} to {max_time}.")

Time range: 2006-01-01 00:00:00 to 2015-01-01 00:00:00
The selected dates 2006-01-01 and 2015-01-01 are within the range of the dataset.


### Calculate  model mass balance for each basin and total mass balance for whole region

In [6]:
##Greenland model file uses the Gregorian calendar (which includes leap years), 
if icesheet == "Greenland":
    # Interpolate lithk values at the start and end dates
    lithk_start = lithk.interp(time=start_date).data.transpose().flatten()
    lithk_end = lithk.interp(time=end_date).data.transpose().flatten()

## Antarctica model file uses the "365_day" calendar (no leap years),
elif icesheet== "Antarctica":
    # Interpolate lithk values at the start and end dates using the nearest method
    lithk_start = lithk.sel(time=start_date1, method='nearest').data
    lithk_end = lithk.sel(time=end_date1, method='nearest').data



# Calculate the difference
lithk_delta = lithk_end - lithk_start

# Replace NaN values with 0
lithk_delta[np.isnan(lithk_delta)] = 0


# Change Ice thickness unit from (m) to mass (kg) to gigatonnes(Gt)
# ice thickness*area* density of ice* 1e-12

rho_ice = 934 # (density of ice, kg/m^3)

#calculate area = x_resolution*y_resolution
x_coords = gis_ds['x'].values
y_coords = gis_ds['y'].values
x_resolution = abs(x_coords[1] - x_coords[0])
y_resolution = abs(y_coords[1] - y_coords[0])

lithk_delta = (lithk_delta * x_resolution*y_resolution)*rho_ice * 1e-12


# Create a list of Point geometries from coordinate grids
points = [Point(x, y) for x in x_coords for y in y_coords]

# Flatten lithk_delta to match the points list 
lithk_delta_flat = lithk_delta.flatten()

# Create DataFrame
lithk_df = pd.DataFrame({
    'geometry': points,
    'lithk_delta': lithk_delta_flat
})

# Convert DataFrame to GeoDataFrame
lithk_gdf = gpd.GeoDataFrame(lithk_df, geometry='geometry', crs=projection)


# Load basin shapefile 
basins_gdf = gpd.read_file(shape_filename)


# Perform spatial join
joined_gdf = gpd.sjoin(lithk_gdf, basins_gdf, how="inner", predicate='intersects')


# Sum lithk_delta values by basin
basin_mass_change_sums = joined_gdf.groupby('index_right')['lithk_delta'].sum()



if icesheet == "Antarctica":
    # Sum lithk_delta values by the 'Regions' column
    region_mass_change_sums = joined_gdf.groupby('Regions')['lithk_delta'].sum()



# Sum all of the basin mass change
model_total_mass_balance= basin_mass_change_sums.sum()




### IMBIE data date format conversion

In [7]:
# Define a function to convert fractional years to a precise datetime format
def fractional_year_to_date(year):
    year_int = int(year)  # Extract the integer part (the full year)
    fraction = year - year_int  # Extract the fractional part
    
    # Start at the beginning of the year
    start_of_year = pd.Timestamp(f'{year_int}-01-01')
    
    # Determine if it's a leap year
    if pd.Timestamp(f'{year_int}-12-31').is_leap_year:
        total_days_in_year = 366
    else:
        total_days_in_year = 365
    
    # Convert the fractional part into the corresponding number of days
    fractional_days = fraction * total_days_in_year
    
    # Add the fractional days to the start of the year to get the correct date
    return start_of_year + timedelta(days=fractional_days)


# Group the data by year
def assign_month_order(group):
    # Get the month of the first entry for the year
    first_month = group['Date'].dt.month.iloc[0]
    
    # Create a month order starting from the first month and increasing by 1 for each subsequent entry
    group['Month_Order'] = range(first_month, first_month + len(group))
    return group

### Extract IMBIE mass balance data

In [8]:
def sum_MassBalance(obs_filename,start_date,end_date):
    
    # Load the CSV file
    mass_balance_data = pd.read_csv(obs_filename)
    
    # Column names
    date_column = 'Year'
    
    # Ensure the 'Year' column is treated as float to capture the fractional year part
    mass_balance_data['Year'] = mass_balance_data['Year'].astype(float)
    
    # Apply the conversion function to the 'Year' column
    mass_balance_data['Date'] = mass_balance_data['Year'].apply(fractional_year_to_date)
  
    # Sort the data by 'Date' column to ensure it’s in increasing order of both year and fraction
    mass_balance_data = mass_balance_data.sort_values(by='Date')
      
    # Apply the function to each group of data (grouped by the year)
    mass_balance_data = mass_balance_data.groupby(mass_balance_data['Date'].dt.year).apply(assign_month_order)
    
    # Convert 'Year' column to year-month-01 format where month is 'Month_Order'
    mass_balance_data['Year'] = mass_balance_data.apply(lambda row: f"{row['Date'].year}-{str(row['Month_Order']).zfill(2)}-01", axis=1)
    
    # Reset the index to flatten the multi-index structure
    mass_balance_data = mass_balance_data.reset_index(drop=True)

    
    # Check if the column exists in the DataFrame
    if mass_balance_column not in mass_balance_data.columns:
        raise ValueError(f"Error: The column '{mass_balance_column}' does not exist in the CSV file.")

    
    # Filter the data for the end date
    end_data = mass_balance_data[mass_balance_data['Year'] == end_date]    
    if end_data.empty:
        raise ValueError(f"Error: No data available for the end date {end_date}.")
    mass_balance_end_value = end_data[mass_balance_column].iloc[-1]  # Last value before or at the end date

    
    # Filter the data for one date before the start date
    data_before_start_date = mass_balance_data[mass_balance_data[date_column] < start_date]
    if data_before_start_date.empty:
        raise ValueError(f"Error: No data available before the start date {start_date}.")
    mass_balance_start_value = data_before_start_date[mass_balance_column].iloc[-1]  # Last value before start date
    
    # Subtract the two values to get the total mass balance change
    IMBIE_total_mass_change_sum = mass_balance_end_value - mass_balance_start_value
    
    return IMBIE_total_mass_change_sum

### Calculate mass balance difference of IMBIE and model data

In [9]:
#Total mass balance
IMBIE_total_mass_change_sum=sum_MassBalance(obs_filename,start_date,end_date)

# Calculate difference of IMBIE-model  mass change 
delta_masschange=IMBIE_total_mass_change_sum-model_total_mass_balance


In [10]:
# Check if all required files are available
if icesheet == "Antarctica":
    print_regionalresult_check=[]
    if os.path.exists(obs_east_filename) and os.path.exists(obs_west_filename) and os.path.exists(obs_peninsula_filename):
        #Check
        print_regionalresult_check='YES' 
        
        # Calculate total mass for each region
        IMBIE_total_mass_change_sum_east = sum_MassBalance(obs_east_filename,start_date,end_date)
        IMBIE_total_mass_change_sum_west = sum_MassBalance(obs_west_filename,start_date,end_date)
        IMBIE_total_mass_change_sum_peninsula = sum_MassBalance(obs_peninsula_filename,start_date,end_date)

        # Calculate difference of IMBIE-model mass change for each region
        delta_masschange_east = IMBIE_total_mass_change_sum_east - region_mass_change_sums['East']
        delta_masschange_west = IMBIE_total_mass_change_sum_west - region_mass_change_sums['West']
        delta_masschange_peninsula = IMBIE_total_mass_change_sum_peninsula - region_mass_change_sums['Peninsula']

## Display result

In [11]:
# Remove  'index_right'
basin_mass_change_sums = basin_mass_change_sums.reset_index(drop=True)

# Apply formatting to two decimal places
formatted_mass_change_sums = basin_mass_change_sums.apply(lambda x: f"{x:.2f}")

# Print the formatted series with the numeric index
print('Basin, Mass change (Gt)')
print(formatted_mass_change_sums.to_string())
print('Total, ', model_total_mass_balance.round(2))
# Insert a gap
print()

print(mass_balance_column_clean)
print('Total, Difference of IMBIE and Model')
# print(f" {IMBIE_total_mass_change_sum}, {delta_masschange:.2f}")
print(f"{IMBIE_total_mass_change_sum:.2f}, {delta_masschange:.2f}")
# Insert a gap
print()


if icesheet == "Antarctica":
    #Remove 'Regions' 
    region_mass_change_sums.name = None
    
    # Remove index name 
    region_mass_change_sums.index.name = None
    
    # Format the Series without displaying the 'dtype'
    formatted_region_mass_change = region_mass_change_sums.apply(lambda x: f"{x:.2f}") 
    
    print('Region, Mass change (Gt)')
    print(formatted_region_mass_change.to_string())
    # Insert a gap
    print()
    
    if print_regionalresult_check =='YES':
        print(mass_balance_column_clean)
        print('Region, Total, Difference of IMBIE and Model')
        print(f"East, {IMBIE_total_mass_change_sum_east:.2f}, {delta_masschange_east:.2f}")
        print(f"West, {IMBIE_total_mass_change_sum_west:.2f}, {delta_masschange_west:.2f}")
        print(f"Peninsula, {IMBIE_total_mass_change_sum_peninsula:.2f}, {delta_masschange_peninsula:.2f}")



Basin, Mass change (Gt)
0       -4.89
1        0.01
2      -11.73
3      -71.64
4       22.10
5       28.35
6      -31.31
7       -8.57
8      -21.48
9      -37.24
10     -82.70
11     -13.70
12      36.78
13      -4.67
14    -132.67
15      -5.73
16      -1.48
17      -8.64
18     -54.56
Total,  -403.77

Cumulative mass balance (Gt)
Total, Difference of IMBIE and Model
-1788.05, -1384.29

Region, Mass change (Gt)
East         -146.85
Islands        -4.89
Peninsula     -15.85
West         -236.18

Cumulative mass balance (Gt)
Region, Total, Difference of IMBIE and Model
East, 112.40, 259.24
West, -1503.50, -1267.32
Peninsula, -396.94, -381.09
