# The regidder used for the project 
Used a conda env with xesmf

In [2]:
import random
import datetime
import numpy as np
import xesmf as xe
import xarray as xr
from pathlib import Path

In [3]:
def get_lat_lon_bounds(ds):
	lat_min = ds.latitude.min().item()
	lat_max = ds.latitude.max().item()
	lon_min = ds.longitude.min().item()
	lon_max = ds.longitude.max().item()
	return lat_min, lat_max, lon_min, lon_max

In [4]:
interp_method = 'bilinear'

data_path = '/mnt/otherdrive/mac_winter_areas/no_catch_dates'

regrider_path = '/home/anna/msc_oppgave/fish-forecast/regridders'
output_path = data_path + '/bio_resampled'
# create output directory if it does not exist
Path(output_path).mkdir(parents=True, exist_ok=True)

# find all files in the data path
files = list(Path(data_path+'/bio').glob('*.nc'))
files.sort()

# get the latitude and longitude bounds from the last file (except 25km^2: first file)
lat_min, lat_max, lon_min, lon_max = get_lat_lon_bounds(xr.open_dataset(files[0]))
lat_min625, lat_max625, lon_min625, lon_max625 = get_lat_lon_bounds(xr.open_dataset(files[-1]))
lat_min_phy, lat_max_phy, lon_min_phy, lon_max_phy = get_lat_lon_bounds(xr.open_dataset(files[-1]))

In [4]:
ds_phy = xr.open_dataset(data_path + '/phy/' + str(files[-1].name))
ds_bio_25 = xr.open_dataset(data_path + '/bio/' + str(files[0].name))
ds_bio_625 = xr.open_dataset(data_path + '/bio/' + str(files[-1].name))

up_regridder = xe.Regridder(ds_bio_25, ds_phy, interp_method, filename=f"{regrider_path}/regrid_upsample_{interp_method}.nc", reuse_weights=True)
down_regridder = xe.Regridder(ds_bio_625, ds_phy, interp_method, filename=f"{regrider_path}/regrid_downsample_{interp_method}.nc", reuse_weights=True)


for date in files:
	ds = xr.open_dataset(date)
	
	date_dt = datetime.datetime.strptime(date.name[:-3], '%Y-%m-%d')
	print(f"Date: {date_dt}")

	# files before 2019/03/22 is 25km^2 data
	if date_dt < datetime.datetime(2019, 3, 22):
		ds_up_on_phy = up_regridder(ds)
		ds_up_on_phy.to_netcdf(output_path + f'/{date_dt.strftime("%Y-%m-%d")}.nc')
		print(f"Upsampled data saved for {date_dt.strftime('%Y-%m-%d')}")
	else:
		ds_down_on_phy = down_regridder(ds)
		ds_down_on_phy.to_netcdf(output_path + f'/{date_dt.strftime("%Y-%m-%d")}.nc')
		print(f"Downsampled data saved for {date_dt.strftime('%Y-%m-%d')}")

Date: 2011-06-02 00:00:00
Upsampled data saved for 2011-06-02
Date: 2011-06-03 00:00:00
Upsampled data saved for 2011-06-03
Date: 2011-06-04 00:00:00
Upsampled data saved for 2011-06-04
Date: 2011-06-05 00:00:00
Upsampled data saved for 2011-06-05
Date: 2011-06-06 00:00:00
Upsampled data saved for 2011-06-06
Date: 2011-06-07 00:00:00
Upsampled data saved for 2011-06-07
Date: 2011-06-10 00:00:00
Upsampled data saved for 2011-06-10
Date: 2011-06-11 00:00:00
Upsampled data saved for 2011-06-11
Date: 2011-06-12 00:00:00
Upsampled data saved for 2011-06-12
Date: 2011-07-10 00:00:00
Upsampled data saved for 2011-07-10
Date: 2011-07-12 00:00:00
Upsampled data saved for 2011-07-12
Date: 2011-07-14 00:00:00
Upsampled data saved for 2011-07-14
Date: 2011-07-17 00:00:00
Upsampled data saved for 2011-07-17
Date: 2011-07-18 00:00:00
Upsampled data saved for 2011-07-18
Date: 2011-07-19 00:00:00
Upsampled data saved for 2011-07-19
Date: 2011-07-24 00:00:00
Upsampled data saved for 2011-07-24
Date: 20

In [8]:
# open all datasets in the output directory and remove all depths except 3.0
# find the index of the layer closest to 3 m
# i = int(np.argmin(np.abs(ds.depth.values - 3.0)))
Path(output_path + '_d3').mkdir(parents=True, exist_ok=True)  # create output directory for depth 3.0 if it does not exist

files = list(Path(output_path).glob('*.nc'))
files.sort(reverse=True)  # sort in reverse order to process the latest files first
for file in files:
	# if output_path + f'_d3/{file.name}'.exists():
	if Path(output_path + f'_d3/{file.name}').exists():
		print(f"File {file.name} already processed, skipping.")
		continue
	ds = xr.open_dataset(file)
	if 'depth' in ds.dims:

		i = int(np.argmin(np.abs(ds.depth.values - 3.0)))
		ds3 = ds.isel(depth=slice(i, i+1)).copy()
		save_path = output_path + f'_d3/{file.name}'
		ds3.to_netcdf(save_path)
		print(f"Depth 3.0 selected for {file.name}")

print("n_depths after:", ds3.depth.size)
print("dims now:", ds3.dims)

File 2024-11-30.nc already processed, skipping.
File 2024-11-29.nc already processed, skipping.
File 2024-11-28.nc already processed, skipping.
File 2024-11-27.nc already processed, skipping.
File 2024-11-26.nc already processed, skipping.
File 2024-11-25.nc already processed, skipping.
File 2024-11-24.nc already processed, skipping.
File 2024-11-23.nc already processed, skipping.
File 2024-11-22.nc already processed, skipping.
File 2024-11-20.nc already processed, skipping.
File 2024-11-19.nc already processed, skipping.
File 2024-11-18.nc already processed, skipping.
File 2024-11-17.nc already processed, skipping.
File 2024-11-16.nc already processed, skipping.
File 2024-11-13.nc already processed, skipping.
File 2024-11-11.nc already processed, skipping.
File 2024-11-10.nc already processed, skipping.
File 2024-11-09.nc already processed, skipping.
File 2024-11-06.nc already processed, skipping.
File 2024-11-02.nc already processed, skipping.
File 2024-11-01.nc already processed, sk

In [None]:
import matplotlib.pyplot as plt
# pick a random file from files
len(files)
random_file = random.choice(files)
d3_path = output_path + f'_d3/'
print(f"Random file selected: {random_file.name}")
# ds_random = xr.open_dataset(f"{d3_path}/{random_file.name}")
# ds_random

# plt.figure(figsize=(10, 6))
# plt.imshow(ds_random['chl'].isel(time=0, depth=0), cmap='viridis', aspect='auto',origin='lower')


Random file selected: 2020-07-02.nc
