In [None]:
import xarray as xr
import glob
import datetime

import numpy as np
import pandas as pd

from scipy.interpolate import griddata

import matplotlib.pyplot as plt
import matplotlib.patches as patches


from scipy.interpolate import NearestNDInterpolator

In [None]:
# tmask:producer = CMCC - ROFS division ;
# tmask:machine = juno ;
# tmask:source mesh = /data/opa/mfs/Med_static/MFS_EAS8_STATIC_V1/NEMO_DATA0/mesh_mask.nc ;
# tmask:method = scipy.interpolate.griddata w/ nearest option ;
# tmask:created = 2024-04-24 16:33:47.071164 ;

In [None]:
# #Land Mask creation
# global_coords = (-180, 180, -90, 90)  # (lon_min, lon_max, lat_min, lat_max). Not used.
# med_coords = (-5, 36, 30, 46)  # (lon_min, lon_max, lat_min, lat_max)

tmask = xr.open_dataset('dati/mist/tmask_interpolated.nc')

ocean_mask = np.flipud(tmask['tmask'])   # Extract the sst data (flipped correctly)
print(ocean_mask.shape)
plt.imshow(ocean_mask)
plt.show()

italy_mask = ocean_mask[0:256, 310:566]    # Focus on italy, 0 for land, 1 for sea
italy_mask = italy_mask.astype(bool)
print(italy_mask.shape)
plt.imshow(italy_mask)
plt.show()

#np.save('dati/mist/mediterrean_mask.npy', ocean_mask)

In [None]:
#Old Land Mask creation, DO NOT USE
global_coords = (-180, 180, -90, 90)  # (lon_min, lon_max, lat_min, lat_max). Not used.
med_coords = (-5, 36, 30, 46)  # (lon_min, lon_max, lat_min, lat_max)

avg_ds = xr.open_dataset('dati/mist/AQUA_MODIS.20020704_20231231.L3m.CU.SST.sst.4km.nc')   # Average data
avg_data = avg_ds['sst']   # Extract the sst variable
mediterrean_avg_data = avg_data.sel(lon=slice(med_coords[0], med_coords[1]), lat=slice(med_coords[3], med_coords[2]))
old_ocean_mask = xr.where(mediterrean_avg_data.isnull(), 0, 1)    # Create a binary land-sea mask (0 for land, 1 for sea)
old_italy_mask = old_ocean_mask[0:256, 310:566]    # Focus on italy, 0 for land, 1 for sea

#plot italy_mask, old_italy_mask, and their differences
difference_mask = old_italy_mask - italy_mask
plt.imshow(difference_mask)


In [None]:
#Dataset creation

# Get a list of all .nc files in the directories and combine all the file lists into one list
files_y2002_2004c = glob.glob('dati/y2002_2004c/*.nc')
files_y2005_2009c = glob.glob('dati/y2005_2009c/*.nc')
files_y2010_2014c = glob.glob('dati/y2010_2014c/*.nc')
files_y2015_2019c = glob.glob('dati/y2015_2019c/*.nc')
files_y2020_2023c = glob.glob('dati/y2020_2023c/*.nc')
all_files = files_y2002_2004c + files_y2005_2009c + files_y2010_2014c + files_y2015_2019c + files_y2020_2023c


# Datasets and dates lists, for day and night
dataset_d = []; date_d = []
dataset_n = []; date_n = []

for file in all_files:
    ds = xr.open_dataset(file)
    # Check if the dataset is day or night by checking the variable name: 'sst' for day, 'sst4' for night
    if 'sst' in ds:
        data = ds['sst'].values[0:256, 310:566]
        data = np.where(italy_mask, data, np.nan)    # Cut away measurements of lakes and rivers
        # Extract the date from the product name. Example: AQUA_MODIS.20030722.L3m.DAY.SST.x_sst.nc
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()    # Use the date in the file name for day data
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)

        # Append the data and the date to the respective lists
        dataset_d.append(data)
        date_d.append(date)
        
    else:
        data = ds['sst4'].values[0:256, 310:566]
        data = np.where(italy_mask, data, np.nan)
        
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)
        
        dataset_n.append(data)
        date_n.append(date)

In [None]:
# Convert the lists to a numpy array

dataset_d = np.array(dataset_d)
date_d = np.array(date_d)
dataset_n = np.array(dataset_n)
date_n = np.array(date_n)

print(dataset_d.shape)
print(date_d.shape)
print(dataset_n.shape)
print(date_n.shape)

In [None]:
# Get the min and max measurements in the whole dataset

data_min = min(np.nanmin(dataset_d), np.nanmin(dataset_n))
data_max = max(np.nanmax(dataset_d), np.nanmax(dataset_n))
print(data_min)
print(data_max)

# print(date_d[0], date_d[-1])
# print(date_n[0], date_n[-1])

In [None]:
# Save absolute values before normalization

abs_dataset_d = dataset_d
abs_dataset_n = dataset_n

np.save('dati/mist/datasets/abs_dataset_d.npy', abs_dataset_d)
np.save('dati/mist/datasets/abs_dataset_n.npy', abs_dataset_n)
np.save('dati/mist/datasets/date_d.npy', date_d)
np.save('dati/mist/datasets/date_n.npy', date_n)

In [None]:
print('min of day:', np.nanmin(dataset_d))
print('avg of day:', np.nanmean(dataset_d))
print('max of day:', np.nanmax(dataset_d))

print('min of night:', np.nanmin(dataset_n))
print('avg of night:', np.nanmean(dataset_n))
print('max of night:', np.nanmax(dataset_n))

print('minimum temperature:', data_min, 'maximum temperature:', data_max)

# print('min of day:', np.nanmin(abs_dataset_d))
# print('avg of day:', np.nanmean(abs_dataset_d))
# print('max of day:', np.nanmax(abs_dataset_d))

# print('min of night:', np.nanmin(abs_dataset_n))
# print('avg of night:', np.nanmean(abs_dataset_n))
# print('max of night:', np.nanmax(abs_dataset_n))

# print('minimum temperature:', data_min, 'maximum temperature:', data_max)

In [None]:
# Normalize the dataset in the range [-1, 1]

dataset_d = 2 * ((dataset_d - data_min) / (data_max - data_min)) - 1
dataset_n = 2 * ((dataset_n - data_min) / (data_max - data_min)) - 1

# Alternative: normalize it in the range [-2, 45]

# temperature_min = -2
# temperature_max = 45
# dataset = np.clip(dataset, temperature_min, temperature_max)
# normalized_dataset = (dataset - temperature_min) / (temperature_max - temperature_min)

In [None]:
print('normalized min of day:', np.nanmin(dataset_d))
print('normalized avg of day:', np.nanmean(dataset_d))
print('normalized max of day:', np.nanmax(dataset_d))

print('normalized min of night:', np.nanmin(dataset_n))
print('normalized avg of night:', np.nanmean(dataset_n))
print('normalized max of night:', np.nanmax(dataset_n))

print('minimum temperature:', data_min, 'maximum temperature:', data_max)

In [None]:
# # Unite the day and night datasets

# dataset = np.concatenate((dataset_d, dataset_n), axis=0)
# date = np.concatenate((date_d, date_n), axis=0)

# print(dataset.shape)
# print(date.shape)

Baseline Creation

In [None]:
# BASELINE CREATION
# Create the baseline arrays
baseline_d = np.empty((366, 256, 256))
baseline_n = np.empty((366, 256, 256))

# Convert the date arrays to pandas DatetimeIndex with datetime format, then get the day of the year for each date
date_series_d = pd.to_datetime(date_d, unit='D', origin=pd.Timestamp('1970-01-01'))
day_of_year_d = date_series_d.dayofyear
date_series_n = pd.to_datetime(date_n, unit='D', origin=pd.Timestamp('1970-01-01'))
day_of_year_n = date_series_n.dayofyear

# For each day of the year, calculate the average temperature for day, night, and general
for day in range(0, 366):
    # Get the indices of the dates that match the current day of the year for day, night, and general
    indices_d = np.where(day_of_year_d == day+1)    # Add 1 to day because day_of_year starts from 1
    indices_n = np.where(day_of_year_d == day+1)
    indices = np.where(day_of_year_d == day+1)
    
    # Calculate the average temperature for the current day of the year for day, night, and general, ignoring absent days
    mean_temp_d = np.nanmean(dataset_d[indices_d], axis=0) if indices_d[0].size > 0 else np.nan
    mean_temp_n = np.nanmean(dataset_n[indices_n], axis=0) if indices_n[0].size > 0 else np.nan
        
    # Assign the mean temperatures to the baseline arrays
    baseline_d[day] = mean_temp_d
    baseline_n[day] = mean_temp_n


# Create a meshgrid for the x and y coordinates, to be used in the interpolation
x = np.arange(256)
y = np.arange(256)
xx, yy = np.meshgrid(x, y)  # Grid of x, y coordinates


# For all NaN values still present in the ocean, interpolate spatially
for day in range(366):
    for baseline_array in [baseline_d, baseline_n]:
        # Get the current baseline and create a mask for the valid values
        data = baseline_array[day]
        valid_mask = ~np.isnan(data) #& italy_mask

        # Get the valid values and their coordinates
        values = data[valid_mask]
        coords = np.array((xx[valid_mask], yy[valid_mask])).T   # Coordinates of the non-nan values, transposed back in 2D

        # Perform the interpolation only on the ocean pixels (italy_mask)
        data_interp = griddata(coords, values, (xx[italy_mask], yy[italy_mask]), method='linear')
        # Assign the interpolated data back to the ocean pixels in the baseline array
        baseline_array[day, italy_mask] = data_interp

        # Perform a nearest-neighbor interpolation to fill in any remaining NaN values
        valid_mask = ~np.isnan(baseline_array[day]) #& italy_mask
        values = baseline_array[day, valid_mask]
        coords = np.array((xx[valid_mask], yy[valid_mask])).T
        interpolator = NearestNDInterpolator(coords, values)    # Nearest-neighbor interpolator
        baseline_array[day, italy_mask] = interpolator((xx[italy_mask], yy[italy_mask]))    # Interpolation and assignment


# Put the value 0 in the land pixels of the baseline arrays
baseline_d[:, ~italy_mask] = 0
baseline_n[:, ~italy_mask] = 0

print(baseline_d.shape)
print(baseline_n.shape)

In [None]:
# Example plot of the baseline for a few days of the year

# Create a dictionary to map the day numbers to the full names of the days and months. Used only for visualization purposes. 2020 is a leap year so we also get the 366th day
day_names = {i: (datetime.datetime(2020, 1, 1) + datetime.timedelta(days=i-1)).strftime('%d %B') for i in range(1, 367)}

# Define the days to plot
days_to_plot = [1, 60, 199, 366]  # 1st of January, 29th of February, 17th of July, 31st of December

# Create a figure and subplots
fig, axs = plt.subplots(4, 2, figsize=(15, 20))  # Create a 4x2 grid of subplots

# For each day to plot
for i, day in enumerate(days_to_plot):
    # Plot the day baseline for the day
    im_d = axs[i, 0].imshow(baseline_d[day-1], cmap='viridis')  # Subtract 1 from day because jan 1 is index 0 in the baseline array
    axs[i, 0].set_title(f'Day {day_names[day]}')
    fig.colorbar(im_d, ax=axs[i, 0])
    
    # Plot the night baseline for the day
    im_n = axs[i, 1].imshow(baseline_n[day-1], cmap='viridis')  # Subtract 1 from day because jan 1 is index 0 in the baseline array
    axs[i, 1].set_title(f'Night {day_names[day]}')
    fig.colorbar(im_n, ax=axs[i, 1])

plt.tight_layout()
plt.show()

In [None]:
# # DEBUG : Iterate over the baseline arrays and check if there are still NaN values
# for i, baseline_array in enumerate([baseline_d, baseline_n]):
#     for day in range(baseline_array.shape[0]):
#         if np.isnan(baseline_array[day]).any():
#             print(f'NaN value found in baseline_array {i} on day {day}')
#             plt.imshow(baseline_array[day])
#             plt.colorbar()
#             plt.title(f'Baseline_array {i} on day {day}')
#             plt.show()

ABS baseline creation

In [None]:
# Create the ABS baseline arrays
abs_baseline_d = np.empty((366, 256, 256))
abs_baseline_n = np.empty((366, 256, 256))

# Convert the date arrays to pandas DatetimeIndex with datetime format, then get the day of the year for each date
date_series_d = pd.to_datetime(date_d, unit='D', origin=pd.Timestamp('1970-01-01'))
day_of_year_d = date_series_d.dayofyear
date_series_n = pd.to_datetime(date_n, unit='D', origin=pd.Timestamp('1970-01-01'))
day_of_year_n = date_series_n.dayofyear


# For each day of the year, calculate the average temperature for day and night
for day in range(0, 366):
    # Get the indices of the dates that match the current day of the year for day and night
    indices_d = np.where(day_of_year_d == day+1)    # Add 1 to day because day_of_year starts from 1
    indices_n = np.where(day_of_year_d == day+1)
    
    # Calculate the average temperature for the current day of the year for day and night, ignoring absent days
    mean_temp_d = np.nanmean(abs_dataset_d[indices_d], axis=0) if indices_d[0].size > 0 else np.nan
    mean_temp_n = np.nanmean(abs_dataset_n[indices_n], axis=0) if indices_n[0].size > 0 else np.nan
        
    # Assign the mean temperatures to the baseline arrays
    abs_baseline_d[day] = mean_temp_d
    abs_baseline_n[day] = mean_temp_n


# Create a meshgrid for the x and y coordinates, to be used in the interpolation
x = np.arange(256)
y = np.arange(256)
xx, yy = np.meshgrid(x, y)  # Grid of x, y coordinates


# For all NaN values still present in the ocean, interpolate spatially
for day in range(366):
    for baseline_array in [abs_baseline_d, abs_baseline_n]:
        # Get the current baseline and create a mask for the valid values
        data = baseline_array[day]
        valid_mask = ~np.isnan(data) #& italy_mask

        # Get the valid values and their coordinates
        values = data[valid_mask]
        coords = np.array((xx[valid_mask], yy[valid_mask])).T   # Coordinates of the non-nan values, transposed back in 2D

        # Perform the interpolation only on the ocean pixels (italy_mask)
        data_interp = griddata(coords, values, (xx[italy_mask], yy[italy_mask]), method='linear')
        # Assign the interpolated data back to the ocean pixels in the baseline array
        baseline_array[day, italy_mask] = data_interp

        # Perform a nearest-neighbor interpolation to fill in any remaining NaN values
        valid_mask = ~np.isnan(baseline_array[day]) #& italy_mask
        values = baseline_array[day, valid_mask]
        coords = np.array((xx[valid_mask], yy[valid_mask])).T
        interpolator = NearestNDInterpolator(coords, values)    # Nearest-neighbor interpolator
        baseline_array[day, italy_mask] = interpolator((xx[italy_mask], yy[italy_mask]))    # Interpolation and assignment


# Put the value 0 in the land pixels of the baseline arrays
abs_baseline_d[:, ~italy_mask] = 0
abs_baseline_n[:, ~italy_mask] = 0

print(abs_baseline_d.shape)
print(abs_baseline_n.shape)

In [None]:
# Example plot of the ABS baseline for a few days of the year

# Create a dictionary to map the day numbers to the full names of the days and months. Used only for visualization purposes. 2020 is a leap year so we also get the 366th day
day_names = {i: (datetime.datetime(2020, 1, 1) + datetime.timedelta(days=i-1)).strftime('%d %B') for i in range(1, 367)}

# Define the days to plot
days_to_plot = [1, 60, 199, 366]  # 1st of January, 29th of February, 17th of July, 31st of December

# Create a figure and subplots
fig, axs = plt.subplots(4, 2, figsize=(15, 20))  # Create a 4x2 grid of subplots

# For each day to plot
for i, day in enumerate(days_to_plot):
    # Plot the day baseline for the day
    im_d = axs[i, 0].imshow(abs_baseline_d[day-1], cmap='viridis')  # Subtract 1 from day because jan 1 is index 0 in the baseline array
    axs[i, 0].set_title(f'Day {day_names[day]}')
    fig.colorbar(im_d, ax=axs[i, 0])
    
    # Plot the night baseline for the day
    im_n = axs[i, 1].imshow(abs_baseline_n[day-1], cmap='viridis')  # Subtract 1 from day because jan 1 is index 0 in the baseline array
    axs[i, 1].set_title(f'Night {day_names[day]}')
    fig.colorbar(im_n, ax=axs[i, 1])

plt.tight_layout()
plt.show()

In [None]:
# # DEBUG : Iterate over the baseline arrays and check if there are still NaN values
# for i, baseline_array in enumerate([abs_baseline_d, abs_baseline_n]):
#     for day in range(baseline_array.shape[0]):
#         if np.isnan(baseline_array[day]).any():
#             print(f'NaN value found in baseline_array {i} on day {day}')
#             plt.imshow(baseline_array[day])
#             plt.colorbar()
#             plt.title(f'Baseline_array {i} on day {day}')
#             plt.show()

In [None]:
# SAVE DATASETS

# np.save('dati/mist/datasets/dataset.npy', dataset)
# np.save('dati/mist/datasets/date.npy', date)
# np.save('dati/mist/datasets/baseline.npy', baseline)

np.save('dati/mist/datasets/date_d.npy', date_d)
np.save('dati/mist/datasets/date_n.npy', date_n)

np.save('dati/mist/datasets/dataset_d.npy', dataset_d)
np.save('dati/mist/datasets/dataset_n.npy', dataset_n)
np.save('dati/mist/datasets/abs_dataset_d.npy', abs_dataset_d)
np.save('dati/mist/datasets/abs_dataset_n.npy', abs_dataset_n)

np.save('dati/mist/datasets/baseline_d.npy', baseline_d)
np.save('dati/mist/datasets/baseline_n.npy', baseline_n)
np.save('dati/mist/datasets/abs_baseline_d.npy', abs_baseline_d)
np.save('dati/mist/datasets/abs_baseline_n.npy', abs_baseline_n)

np.save('dati/mist/datasets/italy_mask.npy', italy_mask)
np.save('dati/mist/datasets/data_min.npy', data_min)
np.save('dati/mist/datasets/data_max.npy', data_max)

In [None]:
# LOAD DATASETS

# dataset = np.load('dati/mist/datasets/dataset.npy')
# date = np.load('dati/mist/datasets/date.npy')
# baseline = np.load('dati/mist/datasets/baseline.npy')

date_d = np.load('dati/mist/datasets/date_d.npy')
date_n = np.load('dati/mist/datasets/date_n.npy')

dataset_d = np.load('dati/mist/datasets/dataset_d.npy')
dataset_n = np.load('dati/mist/datasets/dataset_n.npy')
abs_dataset_d = np.load('dati/mist/datasets/abs_dataset_d.npy')
abs_dataset_n = np.load('dati/mist/datasets/abs_dataset_n.npy')

baseline_d = np.load('dati/mist/datasets/baseline_d.npy')
baseline_n = np.load('dati/mist/datasets/baseline_n.npy')
abs_baseline_d = np.load('dati/mist/datasets/abs_baseline_d.npy')
abs_baseline_n = np.load('dati/mist/datasets/abs_baseline_n.npy')

italy_mask = np.load('dati/mist/datasets/italy_mask.npy')
data_min = np.load('dati/mist/datasets/data_min.npy')
data_max = np.load('dati/mist/datasets/data_max.npy')

for mediterrean data

In [None]:
#Dataset creation

# Get a list of all .nc files in the directories and combine all the file lists into one list
files_y2002_2004c = glob.glob('dati/y2002_2004c/*.nc')
files_y2005_2009c = glob.glob('dati/y2005_2009c/*.nc')
files_y2010_2014c = glob.glob('dati/y2010_2014c/*.nc')
files_y2015_2019c = glob.glob('dati/y2015_2019c/*.nc')
files_y2020_2023c = glob.glob('dati/y2020_2023c/*.nc')
all_files = files_y2002_2004c + files_y2005_2009c + files_y2010_2014c + files_y2015_2019c + files_y2020_2023c


# Datasets and dates lists, for day and night
dataset_d = []; date_d = []
dataset_n = []; date_n = []

for file in all_files:
    ds = xr.open_dataset(file)
    # Check if the dataset is day or night by checking the variable name: 'sst' for day, 'sst4' for night
    if 'sst' in ds:
        data = ds['sst'].values
        data = np.where(ocean_mask, data, np.nan)    # Cut away measurements of lakes and rivers
        # Extract the date from the product name. Example: AQUA_MODIS.20030722.L3m.DAY.SST.x_sst.nc
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()    # Use the date in the file name for day data
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)

        # Append the data and the date to the respective lists
        dataset_d.append(data)
        date_d.append(date)
        
    else:
        data = ds['sst4'].values
        data = np.where(ocean_mask, data, np.nan)
        
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)
        
        dataset_n.append(data)
        date_n.append(date)

In [None]:
# Convert the lists to a numpy array

dataset_d = np.array(dataset_d)
date_d = np.array(date_d)
dataset_n = np.array(dataset_n)
date_n = np.array(date_n)

print(dataset_d.shape)
print(date_d.shape)
print(dataset_n.shape)
print(date_n.shape)

In [None]:
np.save('dati/mist/datasets/abs_mediterrean_d.npy', dataset_d)
np.save('dati/mist/datasets/abs_mediterrean_n.npy', dataset_n)

for tirreno

In [None]:
#identify the area of interest, containing 4.5625 to 9.5∘ E and 39.5 to 44.4375∘ N
med_coords = (-5, 36, 30, 46)  # (lon_min, lon_max, lat_min, lat_max)
tirreno_coords = (4.5625, 9.5, 39.5, 44.4375)  # (lon_min, lon_max, lat_min, lat_max)

# cut landmask
tirreno_mask = tmask["tmask"].sel(lon=slice(4.5625, 9.5), lat=slice(39.5, 44.4375))
tirreno_mask = np.flipud(tirreno_mask)  #flip the mask
print(tirreno_mask.shape)   #119x119, despite the documentation saying 112x112
plt.imshow(tirreno_mask)
plt.show()



In [None]:
def find_indices(lat, lon, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res):
    """
    Find the indices in the tmask array corresponding to the given latitude and longitude.

    Returns:
    (int, int): Indices corresponding to the given latitude and longitude.
    """
    lat_index = int((lat - lat_min) / lat_res)
    lon_index = int((lon - lon_min) / lon_res)
    return lat_index, lon_index


lat_min = 30  # Minimum latitude
lat_max = 46  # Maximum latitude
lon_min = -5  # Minimum longitude
lon_max = 36  # Maximum longitude
lat_res = (lat_max - lat_min) / 384  # Latitude resolution
lon_res = (lon_max - lon_min) / 984  # Longitude resolution

# Coordinates of the point of interest
lat1 = 44.4375
lon1 = 4.5625

lat2 = 39.5
lon2 = 9.5

# Find the indices
lat_index1, lon_index1 = find_indices(lat1, lon1, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)
print(f"Indices for coordinates ({lat1}, {lon1}): ({lat_index1}, {lon_index1})")
lat_index2, lon_index2 = find_indices(lat2, lon2, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)
print(f"Indices for coordinates ({lat2}, {lon2}): ({lat_index2}, {lon_index2})")

In [None]:
#Feeling extra! All four corners of the tirreno area
def find_coordinates(lat_index, lon_index, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res):
    """
    Find the latitude and longitude corresponding to the given indices in the tmask array.

    Returns:
    (float, float): Latitude and longitude corresponding to the given indices.
    """
    lat = lat_min + lat_index * lat_res
    lon = lon_min + lon_index * lon_res
    return lat, lon

# Given indices range
lat_min_index = 90
lat_max_index = 346
lon_min_index = 229
lon_max_index = 485

# Latitude and longitude ranges of mediterrean area
lat_min = 30
lat_max = 46
lon_min = -5
lon_max = 36

# Calculate resolution
lat_res = (lat_max - lat_min) / 384
lon_res = (lon_max - lon_min) / 984

# Find coordinates for the four corners
top_left = find_coordinates(lat_min_index, lon_min_index, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)
top_right = find_coordinates(lat_min_index, lon_max_index, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)
bottom_left = find_coordinates(lat_max_index, lon_min_index, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)
bottom_right = find_coordinates(lat_max_index, lon_max_index, lat_min, lat_max, lon_min, lon_max, lat_res, lon_res)

print(f"Top-left corner coordinates: {top_left}")
print(f"Top-right corner coordinates: {top_right}")
print(f"Bottom-left corner coordinates: {bottom_left}")
print(f"Bottom-right corner coordinates: {bottom_right}")

In [None]:
print(346-256, 346)
print(229, 229+256)

In [None]:
print(384-346, 38 + 256)
print(348 - 256)

In [None]:
ocean_mask = tmask["tmask"].values[90:346, 229:485]  # Extract the sst data (flipped correctly)
tirreno_mask = np.flipud(ocean_mask)
print(tirreno_mask.shape)
plt.imshow(tirreno_mask)
plt.show()
np.save('dati/mist/datasets/tirreno_mask.npy', tirreno_mask)

In [None]:
#Dataset creation

# Get a list of all .nc files in the directories and combine all the file lists into one list
files_y2002_2004c = glob.glob('dati/y2002_2004c/*.nc')
files_y2005_2009c = glob.glob('dati/y2005_2009c/*.nc')
files_y2010_2014c = glob.glob('dati/y2010_2014c/*.nc')
files_y2015_2019c = glob.glob('dati/y2015_2019c/*.nc')
files_y2020_2023c = glob.glob('dati/y2020_2023c/*.nc')
all_files = files_y2002_2004c + files_y2005_2009c + files_y2010_2014c + files_y2015_2019c + files_y2020_2023c


# Datasets and dates lists, for day and night
dataset_d = []; date_d = []
dataset_n = []; date_n = []

for file in all_files:
    ds = xr.open_dataset(file)
    # Check if the dataset is day or night by checking the variable name: 'sst' for day, 'sst4' for night
    if 'sst' in ds:
        data = ds['sst'].values[38:294, 229:485]
        data = np.where(tirreno_mask, data, np.nan)    # Cut away measurements of lakes and rivers
        # Extract the date from the product name. Example: AQUA_MODIS.20030722.L3m.DAY.SST.x_sst.nc
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()    # Use the date in the file name for day data
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)

        # Append the data and the date to the respective lists
        dataset_d.append(data)
        date_d.append(date)
        
    else:
        data = ds['sst4'].values[38:294, 229:485]
        data = np.where(tirreno_mask, data, np.nan)
        
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)
        
        dataset_n.append(data)
        date_n.append(date)

In [None]:
# Convert the lists to a numpy array

dataset_d = np.array(dataset_d)
date_d = np.array(date_d)
dataset_n = np.array(dataset_n)
date_n = np.array(date_n)

print(dataset_d.shape)
print(date_d.shape)
print(dataset_n.shape)
print(date_n.shape)

In [None]:
np.save('dati/mist/datasets/abs_tirreno_d.npy', dataset_d)
np.save('dati/mist/datasets/abs_tirreno_n.npy', dataset_n)

In [None]:
abs_tirreno_d = np.load('dati/mist/datasets/abs_tirreno_d.npy')
abs_tirreno_n = np.load('dati/mist/datasets/abs_tirreno_n.npy')
tirreno_mask = np.load('dati/mist/datasets/tirreno_mask.npy')

In [None]:
#plot the first day of the tirreno dataset, with overlayed mask
day = 27
plt.imshow(abs_tirreno_n[day])
plt.imshow(tirreno_mask, alpha=0.25)
plt.show()
plt.imshow(tirreno_mask)
plt.show()

map with traced areas

In [None]:
# Plot the ocean_mask
plt.figure(figsize=(12, 6))  # Increase the figure size (width, height)
plt.imshow(ocean_mask)
plt.axis('off')  # Remove the frame

# Create a red rectangle patch for the first area
rect1 = patches.Rectangle((310, 0), 256, 256, linewidth=1, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect1)

# # Create a magenta rectangle patch for the second area
# rect2 = patches.Rectangle((229, 38), 256, 256, linewidth=1, edgecolor='green', facecolor='none')
# plt.gca().add_patch(rect2)

# Show the plot
plt.show()

In [None]:
#plot the image screenshotMed.png
import matplotlib.image as mpimg
img = mpimg.imread('screenshotMed.png')
imgplot = plt.imshow(img)
plt.axis('off')  # Remove the frame
plt.show()

print(img.shape)    #print information about the image
print(img.shape[0]/img.shape[1])    #print the proportions of the shape of the image


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg
from PIL import Image

# Load and resize the image
img = Image.open('screenshotMed.png')
img_resized = img.resize((984, 384))
# Convert the resized image to an array
img_resized_array = np.array(img_resized)

# Set the figure size
plt.figure(figsize=(12, 6))  # Increase the figure size (width, height)

# Plot the resized image
plt.imshow(img_resized_array)
plt.axis('off')  # Remove the frame

# Create a red rectangle patch for the first area
rect1 = patches.Rectangle((310, 0), 256, 256, linewidth=1, edgecolor='red', facecolor='none')
plt.gca().add_patch(rect1)

# # Create a green rectangle patch for the second area
# rect2 = patches.Rectangle((229, 38), 256, 256, linewidth=1, edgecolor='green', facecolor='none')
# plt.gca().add_patch(rect2)

# Show the plot
plt.show()