In [None]:
import os
import re
from datetime import datetime, timedelta
import rasterio
import numpy as np
from tqdm import tqdm
import multiprocessing as mp
import pandas as pd

# READ-ME


In [None]:
'''
DEPENDENCIES: pandas, numpy, tqdm, matplotlib, rasterio, multiprocessing

INPUT DATA ---> ICE CONCENTRATION DATA FROM THE NATIONAL SNOW AND ICE DATA CENTER (NSIDC)
- DOWNLOAD THE DATA FROM: https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/

DUE TO THE SIZE OF THE PROJECT, BACK-UP DATA FILES WERE CREATED, ONE HAS BEEN PROVIDED FOR 'future_ice_data' IN CASE OF ANY ISSUES WITH THE ORIGINAL DATA FILES.

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

WHAT THIS NOTEBOOK DOES:
- MODELS FUTURE ARCTIC ICE CONCETRATION USING A LINEAR DECAY MODEL BASED ON HISTORIAL SEASONALITY DATA 
- APPLIES THE FORCASTED ICE CONCENTRATION TO TIF FILES TO CREATE FUTURE ICE CONCENTRATION MAPS

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

FOR QUESTION OR ISSUES CONTACT: 
- Ioannis Thomopoulos       -->  ioannis.thomopoulos@studbocconi.it
- Jacopo D'Angelo           -->  jacopo.dangelo@studbocconi.it
- Max Rienth                -->  maximilian.rienth@studbocconi.it
- Luca Milani               -->  luca.milani2@studbocconi.it

'''

# DATA COLLECTION + CONFIGURATION + GENERAL SET-UP


In [None]:
data_dir = 'original_data/ice_tifs_historic'
outfile = 'created_data/predicted_concentration.csv'
nodata_list_file = 'created_data/nodata_pixels.csv'
temp_dir = 'created_data/IGNORE_THIS___pickling_file'
os.makedirs(temp_dir, exist_ok=True)

pattern = re.compile(r'(\d{4})_N_(\d{8})_concentration_v3\.0\.tif')
train_files = []
for fname in os.listdir(data_dir):
    m = pattern.match(fname)
    if not m:
        continue
    year = int(m.group(1))
    date = datetime.strptime(m.group(2), '%Y%m%d')
    if 2015 <= year <= 2020 and not (date.month == 2 and date.day == 29):
        train_files.append((date, os.path.join(data_dir, fname)))
train_files.sort(key=lambda x: x[0])

with rasterio.open(train_files[0][1]) as src0:
    height, width = src0.height, src0.width
    nodata_val = src0.nodata

stack = np.empty((len(train_files), height, width), dtype=np.float32)
mask_nodata = np.zeros((len(train_files), height, width), dtype=bool)
for i, (_, path) in enumerate(tqdm(train_files, desc='Loading training rasters')):
    with rasterio.open(path) as src:
        arr = src.read(1).astype(np.float32)
    nodata_mask = (arr == nodata_val)
    stack[i] = np.where(nodata_mask, np.nan, arr)
    mask_nodata[i] = nodata_mask

nodata_pixels  = np.any(mask_nodata, axis=0)
water_pixels   = np.nanmax(stack, axis=0) <= 10
land_pixels    = np.nanmax(stack, axis=0) > 2000
variable_mask  = ~(nodata_pixels | water_pixels | land_pixels)
rows, cols     = np.where(variable_mask)

mdays = [31,28,31,30,31,30,31,31,30,31,30,31]
baseline = np.zeros((365, height, width), dtype=np.float64)
counts   = np.zeros(365, dtype=int)

for date, _ in train_files:
    doy = sum(mdays[:date.month-1]) + date.day - 1
    idx = next(i for i, (d, _) in enumerate(train_files) if d == date)
    baseline[doy] += np.nan_to_num(stack[idx], nan=0.0)
    counts[doy]   += 1

for doy in range(365):
    if counts[doy] > 0:
        baseline[doy] /= counts[doy]
    else:
        baseline[doy].fill(np.nan)

# MODELLING ICE


In [4]:
t0         = datetime(2024, 9, 15)
t_end      = datetime(2050, 9, 15)
total_days = (t_end - t0).days

start_date = datetime(2025, 1, 1)
end_date   = datetime(2050, 12, 31)
dates = []
current = start_date
while current <= end_date:
    # skip Feb 29
    if not (current.month == 2 and current.day == 29):
        dates.append(current)
    current += timedelta(days=1)

def init_worker(baseline_arr, rows_arr, cols_arr):
    global BASELINE, ROWS, COLS
    BASELINE = baseline_arr
    ROWS     = rows_arr
    COLS     = cols_arr

def process_date(cur_date):
    # multiplier
    if cur_date < t0:
        M = 1.0
    elif cur_date > t_end:
        M = 0.0
    else:
        M = 1.0 - ((cur_date - t0).days / total_days)

    # day-of-year index
    doy = sum(mdays[:cur_date.month-1]) + cur_date.day - 1
    base = BASELINE[doy]           # (H, W)
    pred = base * M                # (H, W)

    # write temp CSV
    fname = os.path.join(temp_dir, f"{cur_date.strftime('%Y%m%d')}.csv")
    with open(fname, 'w') as f:
        for r, c in zip(ROWS, COLS):
            f.write(f"{cur_date.strftime('%Y-%m-%d')},{r},{c},{pred[r, c]:.3f}\n")
    return fname

NOTE: The cell below will take a while to run


In [None]:
if __name__ == '__main__':
    ctx = mp.get_context('fork')   # force fork, avoids pickling init_worker
    with ctx.Pool(
            processes=8,                       # cap at 8 workers
            initializer=init_worker,           # each worker gets baseline & mask
            initargs=(baseline, rows, cols)
        ) as pool:
        for _ in tqdm(
                pool.imap_unordered(process_date, dates),
                total=len(dates),
                desc='Predicting dates'
            ):
            pass

    # === 10) Merge temp CSVs ===
    temp_files = sorted(os.listdir(temp_dir))
    with open(outfile, 'w') as fout:
        fout.write('date,row,col,predicted_concentration\n')
        for fname in tqdm(temp_files, desc='Merging CSVs'):
            path = os.path.join(temp_dir, fname)
            with open(path) as fin:
                fout.write(fin.read())
            os.remove(path)

    # === 11) Save nodata pixel list ===
    nodata_coords = list(zip(*np.where(nodata_pixels)))
    with open(nodata_list_file, 'w') as fn:
        fn.write('row,col\n')
        for r, c in tqdm(nodata_coords, desc='Saving nodata pixels'):
            fn.write(f'{r},{c}\n')

# APPLYING FORCASTED ICE TO ARCTIC MAP


In [None]:
future_ice_data = pd.read_csv('created_data/predicted_concentration.csv')
# future_ice_data = pd.read_csv('created_data/backup_data/predicted_concentration_backup.csv')  # Uncomment to use backup file

In [None]:
out_dir      = 'created_data/predicted_tifs'
template_path = 'original_data/ice_tifs_historic/2024_N_20241231_concentration_v3.0.tif'
os.makedirs(out_dir, exist_ok=True)


with rasterio.open(template_path) as src:
    meta     = src.meta.copy()
    template = src.read(1)
    nodata   = src.nodata
    height, width = src.height, src.width

# derive masks
land_mask   = template > 2000
water_mask  = template == 0

# build palette
cmap = {}
for i in range(0, 255):
    ratio = i / 254.0
    r = int(ratio * 255)
    g = int(ratio * 255)
    b = 255
    cmap[i] = (r, g, b)
cmap[200] = (0, 255, 0)     # land → green
cmap[255] = (128, 0, 128)   # nodata → purple

meta.update({
    'dtype':       'uint8',
    'count':       1,
    'photometric': 'palette',
    'nodata':      255
})


grouped = future_ice_data.groupby('date')

for date_token, df_day in grouped:
    # format date_token to YYYYMMDD string
    date_str = pd.to_datetime(date_token).strftime('%Y%m%d')
    out_tif  = os.path.join(out_dir, f'{date_str}_predicted.tif')

    # start with all pixels nodata
    pal_arr = np.full((height, width), 255, dtype=np.uint8)

    # extract numpy arrays for this date
    rows = df_day['row'].to_numpy(dtype=int)
    cols = df_day['col'].to_numpy(dtype=int)
    vals = df_day['predicted_concentration'].to_numpy(dtype=float)

    # map concentration → 0–254
    idxs = np.rint(vals / 1000.0 * 254).astype(np.int16)
    idxs = np.clip(idxs, 0, 254)

    # fill variable pixels
    pal_arr[rows, cols] = idxs

    # overwrite water and land
    pal_arr[water_mask] = 0
    pal_arr[land_mask]  = 200

    # write out
    with rasterio.open(out_tif, 'w', **meta) as dst:
        dst.write(pal_arr, 1)
        dst.write_colormap(1, cmap)

    print(f'Wrote {out_tif}')