In [21]:
"""
Created on Wed Jul 5 2023

@author: Laia Amorós

Version that Maija has modified
"""

import netCDF4 as nc
import numpy as np
import xarray as xr

import os
import sys

#sys.path.append('../src')

def regrid(data, lon, lat, factor1, factor2):
    """
    Regrids data to lower resolution by averaging over a window of size factor1 x factor2.
    """
    data_unmasked = data.filled(np.nan)
    data_shape = data.shape
    regridded_shape = (data_shape[0] // factor1, data_shape[1] // factor2)

    data_reg = np.full(regridded_shape, np.nan)
    lon_reg = np.zeros(regridded_shape)
    lat_reg = np.zeros(regridded_shape)

    for i in range(regridded_shape[0]):
        for j in range(regridded_shape[1]):
            window = data_unmasked[i*factor1:min((i+1)*factor1, data_shape[0]), 
                                                j*factor2:min((j+1)*factor2, data_shape[1])]
            lon_window = lon[i*factor1:min((i+1)*factor1, data_shape[0]), 
                                                j*factor2:min((j+1)*factor2, data_shape[1])]
            lat_window = lat[i*factor1:min((i+1)*factor1, data_shape[0]), 
                                                j*factor2:min((j+1)*factor2, data_shape[1])]

            data_reg[i, j] = np.nanmean(window)
            lon_reg[i, j] = np.nanmean(lon_window)
            lat_reg[i, j] = np.nanmean(lat_window)

    # Mask new arrays where np.isnan is True
    data_reg = np.ma.masked_where(np.isnan(data_reg), data_reg)

    return data_reg, lon_reg, lat_reg


def covariance_matrix(matrix):
    """
    Computes the covariance matrix of a matrix with NaN values.
    """
    matrix_flat = np.asarray(matrix).flatten()
    deviations = matrix_flat - np.nanmean(matrix_flat)
    deviations[np.isnan(deviations)] = 0
    outer_product = np.outer(deviations, deviations)
    num_non_nan = np.count_nonzero(~np.isnan(matrix_flat))
    covariance_matrix = outer_product / (num_non_nan - 1)

    return covariance_matrix


# def write_dataarray(cov, lat, lon):
#     nr = cov.shape[1]
#     out = xr.DataArray(
#         cov,
#         dims = [('nparams'), ('nparams')],
#         coords = {'nparams': np.arange(nr), 'nparams': np.arange(nr)
#         })
#     covmat['latlon_nparams'] = xr.DataArray(

#     ) 
    


def main():
    NUMBER_OF_FILES = 2 # change to the number of covariance matrices you want to produce
    FACTOR1 = 15
    FACTOR2 = 2

    # Path to CO2M simulations data and output
    # in LUMI
 #   DATA_PATH = '/scratch/project_462000289/CO2M_obs/CO2M_simulations/2018/Orbits_Europe/CO2Meast/'
 #   OUTPUT_PATH = '/scratch/project_462000289/covariance_matrices'
    DATA_PATH = '/home/pietaril/Documents/data/CO2M_testdata/CO2M_simulations/'
    OUTPUT_PATH = '/home/pietaril/Documents/data/CO2M_testdata/unc_cov_matrices/'
    
    filenames = [os.path.join(DATA_PATH, f) for f in os.listdir(DATA_PATH) if f.endswith('.nc')]
        
    for file in filenames[1:NUMBER_OF_FILES]:
        # Read data
        data_nc = nc.Dataset(file, 'r')
        xco2 = data_nc.groups['data']['observation_data']['xco2'][:]
        xco2_quality_flag = data_nc.groups['data']['observation_data']['xco2_quality_flag'][:]
        uncertainties = data_nc.groups['data']['observation_data']['xco2_precision'][:]
        lon = data_nc.groups['data']['geolocation_data_dem']['longitude'][:]
        lat = data_nc.groups['data']['geolocation_data_dem']['latitude'][:]

        # Regrid data to lower resolution. Factor1 and factor2 can be changed to any integer
        uncertainties_reg, lon_reg, lat_reg = regrid(uncertainties, lon, lat, FACTOR1, FACTOR2)

        #Compute covariance matrix
        covariance = covariance_matrix(uncertainties_reg)
    


        # create xarray dataset with covariance matrix and corresponding spatial coordinates 

        out_cov = xr.Dataset(
            {"covariance": (["nparams", "nparams"], covariance)},
            coords = {
                "lon": (["nparams"], np.asarray(lon_reg).flatten()),
                "lat": (["nparams"], np.asarray(lat_reg).flatten())
            }
            attrs={
                'comment': "Spatial covariance matrix for gridcells numbered in the variable nparams with coordinates of the centre of each gridcell"
            }
        )
        return out_cov


#         # Save covariance matrix to output folder
#         date = os.path.basename(file)[21:29]
#         output_filename = f'uncertainty_covariance_matrix{date}.npy'
#         np.save(os.path.join(OUTPUT_PATH, output_filename), covariance)


# if __name__ == "__main__":
#     main()



In [22]:
cov = main()

  data_reg[i, j] = np.nanmean(window)
  deviations = matrix_flat - np.nanmean(matrix_flat)


In [23]:
cov

In [4]:
print(len(cov[abs(cov) > 0. ]))

34225


In [5]:
unc

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],
  fill_value=1e+20)

In [8]:
len(cov[abs(cov) > 0.])

34225

In [11]:
cov.shape

(33495, 33495)

For some reason lat & lon not given in 1D so needed to flatten

In [20]:
np.asarray(lon).flatten()

array([  19.08547592,   19.00862694,   18.93191719, ..., -148.00617981,
       -147.87005615, -147.73410034])

In [17]:
lat.flatten()

(609, 55)