In [31]:
from datetime import datetime
import dask.delayed
import xarray as xr
import numpy as np
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
from global_land_mask import globe
import dask


def haversine_distance(lat, lon):
    """
    Calculate pairwise distances between points using Haversine formula.

    Parameters:
    - lat (ndarray) : 1-d flattened latitude grid
    - lon (ndarray) : 1-d flattened longitude grid
    Returns:
    - distances (ndarray): Pairwise distances matrix.
    """

    # Earth radius in kilometers
    earth_radius = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat_rad = np.radians(lat)
    lon_rad = np.radians(lon)

    # Compute differences in latitude and longitude
    dlat = lat_rad[:, np.newaxis] - lat_rad
    dlon = lon_rad[:, np.newaxis] - lon_rad

    # Haversine formula
    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat_rad) * np.cos(lat_rad[:, np.newaxis]) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    # Calculate pairwise distances in kilometers
    distances = earth_radius * c

    return distances


def create_lsm(latmin, latmax, lonmin, lonmax):
     """Creates a land-sea mask (land=1, sea=0) in 0.1 deg x 0.1 deg resolution
     for the chosen area.
     """
     lat = np.linspace(latmin,latmax, (latmax-latmin)*10+1) #, dtype=np.float32)
     lon = np.linspace(lonmin,lonmax, (lonmax - lonmin)*10+1) #, dtype=np.float32)
     longrid, latgrid = np.meshgrid(lon,lat)
     lsm = globe.is_land(latgrid, longrid)
   
     return lsm, latgrid, longrid

def compute_cov_chunk_A(lsm, latgrid, longrid, sigmas, L, v):
    if v == "land":
        latv = latgrid[lsm].flatten()
        n = int(len(latv)/2)
        latv = latv[0:n]
        lonv = longrid[lsm].flatten()[0:n]
        
             
            
    elif v == "ocean":
        latv = latgrid[~lsm].flatten()
        n = int(len(latv)/2)
        latv = latv[0:n]
        lonv = longrid[~lsm].flatten()[0:n]
        
    sigma = sigmas[v]
    l = L[v]
    covv = sigma**2*np.exp(-1*haversine_distance(latv, lonv)/l)

    return covv, latv, lonv
        
             

def compute_cov_separately(lsm, latgrid, longrid, sigmas, L, v):
    """Version of compute_cov that doesn't use dictionaries that store
    both land and sea covariances"""
    
    if v == "land":
        latv = latgrid[lsm].flatten()
        lonv = longrid[lsm].flatten()
             
            
    elif v == "ocean":
        latv = latgrid[~lsm].flatten()
        lonv = longrid[~lsm].flatten()
             
    sigma = sigmas[v]
    l = L[v]
    covv = sigma**2*np.exp(-1*haversine_distance(latv, lonv)/l)
        
    return covv, latv, lonv

    


In [32]:
#Europe
latmin = 30
latmax = 75
lonmin = -15
lonmax = 40

#smaller testarea
# latmin = 30
# latmax = 35
# lonmin = -10
# lonmax = 0

# Uncertainty (std)
sigmas = {'land': 0.8,
          'ocean': 1.2} 


# Correlation length (km)
L = {'land': 100,  # 
     'ocean': 500}  # for ocean



In [33]:
lsm, latgrid, longrid = create_lsm(latmin, latmax, lonmin, lonmax)
#cov, lat, lon = compute_cov_chunk_A(lsm, latgrid, longrid, sigmas, L, "ocean")


In [38]:
import dask.array as da

latv = latgrid[lsm].flatten()
lonv = longrid[lsm].flatten() 
latv = da.from_array(latv, chunks=35000)
lonv = da.from_array(lonv, chunks=35000)

In [39]:
latv

Unnamed: 0,Array,Chunk
Bytes,0.93 MiB,273.44 kiB
Shape,"(121506,)","(35000,)"
Count,1 Graph Layer,4 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.93 MiB 273.44 kiB Shape (121506,) (35000,) Count 1 Graph Layer 4 Chunks Type float64 numpy.ndarray",121506  1,

Unnamed: 0,Array,Chunk
Bytes,0.93 MiB,273.44 kiB
Shape,"(121506,)","(35000,)"
Count,1 Graph Layer,4 Chunks
Type,float64,numpy.ndarray


In [40]:
v = "land"
sigma = sigmas[v]
l = L[v]
covv = sigma**2*np.exp(-1*haversine_distance(latv, lonv)/l)

In [41]:
covv

Unnamed: 0,Array,Chunk
Bytes,110.00 GiB,9.13 GiB
Shape,"(121506, 121506)","(35000, 35000)"
Count,29 Graph Layers,16 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 110.00 GiB 9.13 GiB Shape (121506, 121506) (35000, 35000) Count 29 Graph Layers 16 Chunks Type float64 numpy.ndarray",121506  121506,

Unnamed: 0,Array,Chunk
Bytes,110.00 GiB,9.13 GiB
Shape,"(121506, 121506)","(35000, 35000)"
Count,29 Graph Layers,16 Chunks
Type,float64,numpy.ndarray


In [20]:
cov, lat, lon = compute_cov_separately(lsm, latgrid, longrid, sigmas, L, "land").compute()

MemoryError: Unable to allocate 55.0 GiB for an array with shape (121529, 121529) and data type float32

In [11]:
cov.compute()

MemoryError: Unable to allocate 55.0 GiB for an array with shape (121529, 121529) and data type float32