In [None]:
import xarray as xr
import glob
import datetime

import numpy as np
import pandas as pd

from scipy.interpolate import griddata

import matplotlib.pyplot as plt

from scipy.interpolate import NearestNDInterpolator

In [None]:
# #Land Mask creation
# global_coords = (-180, 180, -90, 90)  # (lon_min, lon_max, lat_min, lat_max). Not used.
# med_coords = (-5, 36, 30, 46)  # (lon_min, lon_max, lat_min, lat_max)

tmask = xr.open_dataset('dati/mist/tmask_interpolated.nc')

ocean_mask = np.flipud(tmask['tmask'])   # Extract the sst data (flipped correctly)
print(ocean_mask.shape)
plt.imshow(ocean_mask)

italy_mask = ocean_mask[0:256, 310:566]    # Focus on italy, 0 for land, 1 for sea
italy_mask = italy_mask.astype(bool)
print(italy_mask.shape)
plt.imshow(italy_mask)
plt.show()

In [None]:
#Dataset creation

# Get a list of all .nc files in the directories and combine all the file lists into one list
files_y2002_2004c = glob.glob('dati/y2002_2004c/*.nc')
files_y2005_2009c = glob.glob('dati/y2005_2009c/*.nc')
files_y2010_2014c = glob.glob('dati/y2010_2014c/*.nc')
files_y2015_2019c = glob.glob('dati/y2015_2019c/*.nc')
files_y2020_2023c = glob.glob('dati/y2020_2023c/*.nc')
all_files = files_y2002_2004c + files_y2005_2009c + files_y2010_2014c + files_y2015_2019c + files_y2020_2023c


# Datasets and dates lists, for day and night
dataset_d = []; date_d = []
dataset_n = []; date_n = []

qual_d = []; qual_n = []

for file in all_files:
    ds = xr.open_dataset(file)
    # Check if the dataset is day or night by checking the variable name: 'sst' for day, 'sst4' for night
    if 'sst' in ds:
        data = ds['sst'].values[0:256, 310:566]
        data = np.where(italy_mask, data, np.nan)    # Cut away measurements of lakes and rivers

        qual = ds['qual_sst'].values[0:256, 310:566]
        qual = np.where(italy_mask, qual, np.nan)

        # Extract the date from the product name. Example: AQUA_MODIS.20030722.L3m.DAY.SST.x_sst.nc
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()    # Use the date in the file name for day data
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)

        # Append the data and the date to the respective lists
        dataset_d.append(data)
        date_d.append(date)
        qual_d.append(qual)
        
    else:
        data = ds['sst4'].values[0:256, 310:566]
        data = np.where(italy_mask, data, np.nan)

        qual = ds['qual_sst4'].values[0:256, 310:566]
        qual = np.where(italy_mask, qual, np.nan)
        
        date = pd.to_datetime(ds.attrs['product_name'].split('.')[1]).date()
        date = np.array(date, dtype='datetime64[D]')
        date = date.astype(int)
        
        dataset_n.append(data)
        date_n.append(date)
        qual_n.append(qual)

In [None]:
# Convert the lists to a numpy array

dataset_d = np.array(dataset_d)
date_d = np.array(date_d)
dataset_n = np.array(dataset_n)
date_n = np.array(date_n)

qual_d = np.array(qual_d)
qual_n = np.array(qual_n)

print(dataset_d.shape)
print(date_d.shape)
print(dataset_n.shape)
print(date_n.shape)

print(qual_d.shape)
print(qual_n.shape)

In [None]:
# Count quality levels over both day and night data

def analyze_quality(qual_array):
    # Flatten the array to make counting easier
    flattened = qual_array.flatten()
    
    # Count the total number of valid (non-NaN) datapoints
    total_valid = np.count_nonzero(~np.isnan(flattened))
    
    # Initialize a dictionary to hold counts and percentiles for each quality level
    quality_counts = {i: np.count_nonzero(flattened == i) for i in range(6)}
    quality_percentiles = {i: (quality_counts[i] / total_valid) * 100 for i in range(6)}
    
    return quality_counts, quality_percentiles

# Analyze quality for qual_d and qual_n
quality_counts_d, quality_percentiles_d = analyze_quality(qual_d)
quality_counts_n, quality_percentiles_n = analyze_quality(qual_n)

# Print the results
print("Quality counts for qual_d:", quality_counts_d)
print("Quality percentiles for qual_d:", quality_percentiles_d)
print("Quality counts for qual_n:", quality_counts_n)
print("Quality percentiles for qual_n:", quality_percentiles_n)

# ======

# Combine the day and night datasets and dates into one dataset and date array
dataset_total = np.concatenate((dataset_d, dataset_n), axis=0)
date_total = np.concatenate((date_d, date_n), axis=0)
qual_total = np.concatenate((qual_d, qual_n), axis=0)

#test qual_total
quality_counts_total, quality_percentiles_total = analyze_quality(qual_total)
print("Quality counts for qual_total:", quality_counts_total)
print("Quality percentiles for qual_total:", quality_percentiles_total)