### Import the necessary database

In [None]:
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [None]:
#In[2]:
# define function
import src.SAT_function as data_process
import src.Data_Preprocess as preprosess

In [None]:
# import src.slurm_cluster as scluster
# client, scluster = scluster.init_dask_slurm_cluster(scale=4, cores=40, memory="200GB")

In [None]:
def func_mk(x):
    """
    Mann-Kendall test for trend
    """
    results = data_process.apply_mannkendall(x)
    slope = results[0]
    p_val = results[1]
    return slope, p_val

In [None]:
# Input the MMEM of SAT-OBS internal variability
dir_residuals = '/work/mh0033/m301036/Land_surf_temp/Disentangling_OBS_SAT_trend/Figure4/data/'
ds_MME_HadCRUT5_1850_2022 = xr.open_mfdataset(dir_residuals + '1850_2022_Internal_arctic_mean.nc')

In [None]:
ds_MME_HadCRUT5_1850_2022 = ds_MME_HadCRUT5_1850_2022.rename({'__xarray_dataarray_variable__': 'tas'})

In [None]:
ds_MME_HadCRUT5_1850_2022

In [None]:
# Generate the running windows of the residuals of SAT-OBS
#       with a series of equal length with an interval of 5 years starting from 10 years to 100 years
#       and calculate the trend pattern of each segment
#       and calculate the ensemble standard deviation of the trend pattern of each interval of segments

# define the function to generate the running windows of the residuals of SAT-OBS
def generate_segments(data, segment_length):
    """
    data: 3D array with dimensions [year, lat, lon]
    segment_length: length of each segment in years
    """
    years = range(int(data['year'].min().item()), int(data['year'].max().item()) - segment_length + 2)
    print(years)
    # Initialize an empty list to store the segments
    segments = []
    
    # For each year in the range
    for year in years:
        # Extract the segment of data from that year to year + segment_length
        segment = data.sel(year=slice(str(year), str(year + segment_length - 1)))
        
        # Append this segment to the list of segments
        segments.append(segment)
    
    return segments

In [None]:
time_interval = np.arange(10, 74, 1)

In [None]:
time_interval

In [None]:
# Generate the running windows of the residuals of SAT-OBS
ICV_segments = {}
for i in time_interval:
    ICV_segments[i] = generate_segments(ds_MME_HadCRUT5_1850_2022['tas'], segment_length=i)

In [None]:
type(ICV_segments)

In [None]:
ICV_segments.keys()

In [None]:
# calculate the trend values of each segment
ICV_trend = {}
for key in ICV_segments.keys():
    print(key)
    ICV_trend[key] = [data_process.mk_test(x)[0]*10.0 for x in ICV_segments[key]]

In [None]:
# # Assuming ICV_segments is a dictionary with segment_length as keys and list of DataArray segments as values
# max_num_segments = max(len(segments) for segments in ICV_segments.values())
# segment_lengths = ICV_segments.keys()

# # Create a new Dataset to hold the new arrays
# new_ds = xr.Dataset()

# for segment_length in segment_lengths:
#     segments_list = ICV_segments[segment_length]
#     # print(segments_list)
    
#     # Pad the segments list to have the same number of segments
#     padded_segments = segments_list.copy()
#     while len(padded_segments) < max_num_segments:
#         # Create a DataArray filled with NaNs to match the shape of the segments
#         nan_segment = xr.full_like(padded_segments[0], np.nan)
#         padded_segments.append(nan_segment)
    
#     # Create a coordinate for the new segment dimension
#     segment_coord = range(max_num_segments)
    
#     # Concatenate the padded segments with the new segment coordinate
#     concatenated = xr.concat(padded_segments, dim=segment_coord)
    
#     # Assign a specific name to the new dimension
#     concatenated = concatenated.rename({'concat_dim': 'segment'})
    
#     # Add the new DataArray to the new dataset
#     new_ds[f'ICV_segments_{segment_length}yr'] = concatenated

In [None]:
# ds_combined = xr.merge([ds_MME_HadCRUT5_1850_2022, new_ds])

In [None]:
# ds_combined

In [None]:
# check the minimum and maximum of the new variable
# ds_combined['ICV_segments_10yr'].min().values, ds_combined['ICV_segments_10yr'].max().values

In [None]:
ICV_trend

In [None]:
def check_shapes(data_dict):
    for key, value in data_dict.items():
        if isinstance(value, list):
            print(f"{key}: List of {len(value)} elements")
            for i, item in enumerate(value):
                shape = getattr(item, 'shape', 'No shape attribute')
                print(f"  Element {i}: {value}")
        else:
            shape = getattr(value, 'shape', 'No shape attribute')
            print(f"{key}: {value}")

In [None]:
# check_shapes(ICV_trend)

In [None]:
len(ICV_trend[20])

In [None]:
type(ICV_trend)

In [None]:
print(max(len(segments) for segments in ICV_trend.values()))

### Transform the dictionary to the dataset

In [None]:
# Assuming ICV_segments is a dictionary with segment_length as keys and list of segments as values
max_num_segments = max(len(segments) for segments in ICV_trend.values())
segment_lengths = ICV_trend.keys()

# Create a new Dataset to hold the new arrays
new_ds = xr.Dataset()

for segment_length in segment_lengths:
    trend_list = ICV_trend[segment_length]
    # print(trend_list)
    
    # Pad the trend list to have the same number of trend
    padded_trend = trend_list.copy()
    print(type(padded_trend))
    # transform the list to DataArray
    padded_trend = [xr.DataArray(x) for i, x in enumerate(padded_trend)]
    while len(padded_trend) < max_num_segments:
        # Create a DataArray filled with NaNs to match the shape of the trend
        nan_segment = xr.full_like(padded_trend[0], np.nan)
        padded_trend.append(nan_segment)
    
    # Create a coordinate for the new segment dimension
    segment_coord = range(max_num_segments)
    
    # Concatenate the padded trend with the new segment coordinate
    concatenated = xr.concat(padded_trend, dim=segment_coord)
    
    # Assign a specific name to the new dimension
    concatenated = concatenated.rename({'concat_dim': 'segment'})
    
    # Add the new DataArray to the new dataset
    new_ds[f'ICV_trend_{segment_length}yr'] = concatenated

In [None]:
new_ds

In [None]:
# extract the trend pattern of each interval of segments
"""
drop the nan values in the trend pattern of each interval of segments
"""
# segements_10yr_trend = ds_combined['ICV_segments_10yr_trend'].dropna(dim='segment')
# segements_11yr_trend = ds_combined['ICV_segments_11yr_trend'].dropna(dim='segment')
# segements_12yr_trend = ds_combined['ICV_segments_12yr_trend'].dropna(dim='segment')
# segements_13yr_trend = ds_combined['ICV_segments_13yr_trend'].dropna(dim='segment')
# segements_14yr_trend = ds_combined['ICV_segments_14yr_trend'].dropna(dim='segment')
# segements_15yr_trend = ds_combined['ICV_segments_15yr_trend'].dropna(dim='segment')

In [None]:
# segements_15yr_trend

In [None]:
# save the trend pattern of each segment
dir_out = '/work/mh0033/m301036/Land_surf_temp/Disentangling_OBS_SAT_trend/Figure4/data/'
new_ds.to_netcdf(dir_out + 'Internal_arctic_mean_trend_segmented.nc')