In [1]:
import xarray as xr

# Load the file
ds = xr.open_dataset("ESACCI-PERMAFROST-L4-PFR-MODISLST_CRYOGRID-AREA4_PP-2010-fv04.0.nc")
print(ds)


<xarray.Dataset> Size: 2GB
Dimensions:          (time: 1, lat: 6000, lon: 36000)
Coordinates:
  * time             (time) datetime64[ns] 8B 2010-01-01
  * lat              (lat) float32 24kB 25.0 25.01 25.02 ... 84.97 84.98 84.99
  * lon              (lon) float32 144kB -180.0 -180.0 -180.0 ... 180.0 180.0
Data variables:
    spatial_ref      float64 8B ...
    PFR              (time, lat, lon) float32 864MB ...
    PFR_uncertainty  (time, lat, lon) float32 864MB ...
Attributes: (12/39)
    title:                      CCI Permafrost extent
    institution:                University of Oslo
    source:                     MODIS MOD11A1 and MYD11A1 level 3 Collection ...
    history:                    2024-03-20 13:57:43
    references:                 http://cci.esa.int/Permafrost, https://doi.or...
    tracking_id:                9efb3c35-33fa-4fd0-a1be-bbadbf475675
    ...                         ...
    geospatial_lat_units:       degrees_north
    geospatial_lon_units:       degree

In [6]:
import xarray as xr
import os

# --- Paths ---
input_dir = "./"
output_dir = "processed_Permafrost"
os.makedirs(output_dir, exist_ok=True)

# --- Settings ---
var_name = "PFR"
years = list(range(2010, 2022))  # 2010–2021

# --- Process Each File ---
for year in years:
    print(f"Processing {year}...")

    file = f"ESACCI-PERMAFROST-L4-PFR-MODISLST_CRYOGRID-AREA4_PP-{year}-fv04.0.nc"
    filepath = os.path.join(input_dir, file)
    ds = xr.open_dataset(filepath)

    # Clip to Canada bounding box
    ds = ds.sel(lat=slice(40, 85), lon=slice(-141, -52))

    # Rename variable for consistency
    if var_name in ds:
        ds = ds.rename({var_name: "permafrost_fraction"})

    # Add time dimension if needed
    if "time" not in ds.dims:
        ds = ds.expand_dims(time=[f"{year}-01-01"])

    # Save processed file
    output_file = os.path.join(output_dir, f"PERMAFROST_{year}_Canada_Annual.nc")
    ds.to_netcdf(output_file)

print("✅ DONE: All annual Permafrost files processed and saved.")


Processing 2010...
Processing 2011...
Processing 2012...
Processing 2013...
Processing 2014...
Processing 2015...
Processing 2016...
Processing 2017...
Processing 2018...
Processing 2019...
Processing 2020...
Processing 2021...
✅ DONE: All annual Permafrost files processed and saved.


In [7]:
import xarray as xr
import os
import glob

# Define path to processed permafrost files
input_dir = "processed_Permafrost"
output_filename = "PERMAFROST_Canada_Annual_2010_2021.nc"

# Get list of annual files (sorted by year)
file_list = sorted(glob.glob(os.path.join(input_dir, "PERMAFROST_*_Canada_Annual.nc")))

# Open and merge along 'time' dimension
ds_merged = xr.concat([xr.open_dataset(f) for f in file_list], dim="time")

# Save merged NetCDF
ds_merged.to_netcdf(os.path.join(input_dir, output_filename))

print("✅ DONE: Merged Permafrost dataset saved as", output_filename)


✅ DONE: Merged Permafrost dataset saved as PERMAFROST_Canada_Annual_2010_2021.nc


### Plan: Lightweight Custom AR(2) Model

1. Load the original data (2010–2022).
2. For each grid cell:

   * Use last 2 years to fit a simple AR(2):

     $$
     y_t = a \cdot y_{t-1} + b \cdot y_{t-2}
     $$
3. Predict 2023 and 2024.
4. Merge predictions to generate the final file.

In [20]:
import xarray as xr
import numpy as np
import os

# Paths
input_dir = "processed_Permafrost"
output_file = "PERMAFROST_Canada_Annual_2010_2024.nc"
var_name = "permafrost_fraction"

# Step 1: Load 2010–2021
full_years = list(range(2010, 2022))
datasets = []
for year in full_years:
    file = os.path.join(input_dir, f"PERMAFROST_{year}_Canada_Annual.nc")
    ds = xr.open_dataset(file)
    datasets.append(ds)

# Combine into single dataset
ds_all = xr.concat(datasets, dim="time")
ds_all["time"] = np.array(full_years)

# Step 2: AR(2) Prediction Function
def predict_ar2(series):
    y = series.copy()
    for _ in range(3):  # predict 3 future years
        if len(y) < 2 or np.isnan(y[-1]) or np.isnan(y[-2]):
            y = np.append(y, np.nan)
        else:
            a, b = 0.7, 0.3  # weights
            pred = a * y[-1] + b * y[-2]
            y = np.append(y, pred)
    return y[-3:]

# Step 3: Apply per grid cell
values = ds_all[var_name].values  # shape (years, lat, lon)
predicted = np.full((3, *values.shape[1:]), np.nan)

for i in range(values.shape[1]):       # latitude
    for j in range(values.shape[2]):   # longitude
        series = values[:, i, j]
        preds = predict_ar2(series)
        predicted[:, i, j] = preds

# Step 4: Add predictions to dataset
years_future = [2022, 2023, 2024]
ds_pred = xr.Dataset(
    {
        var_name: (["time", "lat", "lon"], predicted)
    },
    coords={
        "time": years_future,
        "lat": ds_all.lat,
        "lon": ds_all.lon
    }
)

# Merge original + predicted
ds_full = xr.concat([ds_all, ds_pred], dim="time")
ds_full.to_netcdf(output_file)

print("✅ Done: Saved forecasted 2010–2024 NetCDF:", output_file)


✅ Done: Saved forecasted 2010–2024 NetCDF: PERMAFROST_Canada_Annual_2010_2024.nc


In [21]:
# Load the file
ds = xr.open_dataset("PERMAFROST_Canada_Annual_2010_2024.nc")
print(ds)


<xarray.Dataset> Size: 5GB
Dimensions:              (time: 15, lat: 4500, lon: 8900)
Coordinates:
  * time                 (time) int64 120B 2010 2011 2012 ... 2022 2023 2024
  * lat                  (lat) float32 18kB 40.01 40.01 40.03 ... 84.98 84.99
  * lon                  (lon) float32 36kB -141.0 -141.0 ... -52.02 -52.01
Data variables:
    spatial_ref          (time) float64 120B ...
    permafrost_fraction  (time, lat, lon) float32 2GB ...
    PFR_uncertainty      (time, lat, lon) float32 2GB ...
Attributes: (12/39)
    title:                      CCI Permafrost extent
    institution:                University of Oslo
    source:                     MODIS MOD11A1 and MYD11A1 level 3 Collection ...
    history:                    2024-03-20 13:57:43
    references:                 http://cci.esa.int/Permafrost, https://doi.or...
    tracking_id:                9efb3c35-33fa-4fd0-a1be-bbadbf475675
    ...                         ...
    geospatial_lat_units:       degrees_north


In [None]:
Forecast 2022–2024 values
Export the result as a new NetCDF  