In [10]:
import os
import copernicusmarine as cpm
from pathlib import Path
import ast
from datetime import datetime

In [11]:
def create_missing_directories():
    # Define the path to the parent directory
    parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

    # Check if 'data' folder exists in the parent directory
    data_dir = os.path.join(parent_dir, 'data')
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
        print("'data' folder created in the parent directory.")
    else:
        print("'data' folder already exists in the parent directory.")

    # Check if 'satellite' directory exists inside 'data' folder
    satellite_dir = os.path.join(data_dir, 'satellite')
    if not os.path.exists(satellite_dir):
        os.makedirs(satellite_dir)
        print("'satellite' directory created inside 'data' folder.")
    else:
        print("'satellite' directory already exists inside 'data' folder.")

if __name__ == "__main__":
    create_missing_directories()

'data' folder already exists in the parent directory.
'satellite' directory already exists inside 'data' folder.


In [12]:
# List of datasets to extract
datasets_to_extract = [
    'cmems_obs-oc_atl_bgc-plankton_nrt_l3-multi-1km_P1D',        # Chla
    'cmems_obs-oc_atl_bgc-optics_nrt_l3-multi-1km_P1D',          # bbp
    'cmems_obs-sl_eur_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D', # currents
    'cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m',                  # carbonate
    'cmems_obs_mob_glo_phy-cur_nrt_0.25deg_P1D-m']               # currents 2

# Directory setup
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_dir   = os.path.join(parent_dir, 'data')
satellite_dir = os.path.join(data_dir, 'satellite')

# Date and time setup
start_time = "2024-05-01T00:00:00"  # Format time as YYYY-MM-DD"T"HH:MM:SS
now = datetime.now()
end_time = now.strftime("%Y-%m-%dT%H:%M:%S")

# Geographical bounds
min_depth = 0
max_depth = 1
min_lon = -35
max_lon = -5
min_lat = 55
max_lat = 66

# Loop through each dataset
for dataset_to_extract in datasets_to_extract:
    # Dynamically set the output filename based on the dataset name
    saved_netCDF_as = f"CMEMS_{dataset_to_extract}_April-May2024.nc"
    filepath = os.path.join(satellite_dir, saved_netCDF_as)

    # Check if the file already exists
    if os.path.exists(filepath):
        response = input(f"Do you want to remove and overwrite the file \"{saved_netCDF_as}\" (y/n): ").strip().lower()
        if response == 'y':
            os.remove(filepath)
            print(f"{filepath} has been removed.")
            print(f"{saved_netCDF_as}. Proceeding to download.")
        else:
            print("Download canceled.")
            continue
    else:
        print(f"{saved_netCDF_as} does not exist. Proceeding to download.")
    
    # Perform the data subset operation
    cpm.subset(
        dataset_id=dataset_to_extract,
        minimum_longitude=min_lon,
        maximum_longitude=max_lon,
        minimum_latitude=min_lat,
        maximum_latitude=max_lat,
        start_datetime=start_time,
        end_datetime=end_time,
        minimum_depth=min_depth,
        maximum_depth=max_depth,
        output_filename=saved_netCDF_as,
        output_directory=satellite_dir
    )

CMEMS_cmems_obs-oc_atl_bgc-plankton_nrt_l3-multi-1km_P1D_April-May2024.nc does not exist. Proceeding to download.


Fetching catalog:   0%|          | 0/3 [15:55<?, ?it/s]
  def __call__(cls, value, names=_not_given, *values, module=None, qualname=None, type=None, start=1, boundary=None):
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x000001BC5C30DBB0>
Fetching catalog: 100%|██████████| 3/3 [00:32<00:00, 10.72s/it]


INFO - 2024-06-09T16:26:16Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-06-09T16:26:16Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-06-09T16:26:31Z - Service was not specified, the default one was selected: "arco-time-series"
INFO - 2024-06-09T16:26:38Z - Downloading using service arco-time-series...
INFO - 2024-06-09T16:26:46Z - <xarray.Dataset> Size: 6GB
Dimensions:              (time: 17, latitude: 1056, longitude: 2880)
Coordinates:
  * latitude             (latitude) float32 4kB 55.01 55.02 ... 65.98 65.99
  * longitude            (longitude) float32 12kB -34.99 -34.98 ... -5.005
  * time                 (time) datetime64[ns] 136B 2024-05-23 ... 2024-06-08
Data variables: (12/21)
    CHL                  (time, latitude, longitude) float32 207MB dask.array<chunksize=(5, 256, 256), meta=np.ndarray>
    CHL_uncertainty      (time, latitude, longitude) float64 414MB dask.array<chunksize=(5, 256, 256), 

100%|██████████| 42186/42186 [05:17<00:00, 132.94it/s]

INFO - 2024-06-09T16:32:13Z - Successfully downloaded to c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs-oc_atl_bgc-plankton_nrt_l3-multi-1km_P1D_April-May2024.nc





c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs-oc_atl_bgc-optics_nrt_l3-multi-1km_P1D_April-May2024.nc has been removed.
CMEMS_cmems_obs-oc_atl_bgc-optics_nrt_l3-multi-1km_P1D_April-May2024.nc. Proceeding to download.
INFO - 2024-06-09T16:33:23Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-06-09T16:33:23Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-06-09T16:34:22Z - Service was not specified, the default one was selected: "arco-time-series"
INFO - 2024-06-09T16:35:53Z - Downloading using service arco-time-series...
INFO - 2024-06-09T16:37:29Z - <xarray.Dataset> Size: 1GB
Dimensions:          (time: 17, latitude: 1056, longitude: 2880)
Coordinates:
  * latitude         (latitude) float32 4kB 55.01 55.02 55.03 ... 65.98 65.99
  * longitude        (longitude) float32 12kB -34.99 -34.98 ... -5.016 -5.005
  * time             (time) datetime64[ns] 136B 2024-05-23 ... 20

100%|██████████| 9866/9866 [01:27<00:00, 112.27it/s]

INFO - 2024-06-09T16:41:15Z - Successfully downloaded to c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs-oc_atl_bgc-optics_nrt_l3-multi-1km_P1D_April-May2024.nc





c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs-sl_eur_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_April-May2024.nc has been removed.
CMEMS_cmems_obs-sl_eur_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_April-May2024.nc. Proceeding to download.
INFO - 2024-06-09T16:41:25Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-06-09T16:41:25Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-06-09T16:41:33Z - Service was not specified, the default one was selected: "arco-geo-series"
INFO - 2024-06-09T16:41:42Z - Downloading using service arco-geo-series...
INFO - 2024-06-09T16:41:49Z - <xarray.Dataset> Size: 57MB
Dimensions:    (time: 40, latitude: 88, longitude: 201, nv: 2)
Coordinates:
    lat_bnds   (latitude, nv) float32 704B dask.array<chunksize=(88, 2), meta=np.ndarray>
  * latitude   (latitude) float32 352B 55.06 55.19 55.31 ... 65.69 65.81 65.94
    lon_bnds   (longitude, nv) float3

100%|██████████| 1408/1408 [01:47<00:00, 13.15it/s] 

INFO - 2024-06-09T16:50:13Z - Successfully downloaded to c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs-sl_eur_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_April-May2024.nc





c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m_April-May2024.nc has been removed.
CMEMS_cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m_April-May2024.nc. Proceeding to download.
INFO - 2024-06-09T16:51:47Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-06-09T16:51:47Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-06-09T16:51:51Z - Service was not specified, the default one was selected: "arco-time-series"
INFO - 2024-06-09T16:51:55Z - Downloading using service arco-time-series...
INFO - 2024-06-09T16:52:00Z - <xarray.Dataset> Size: 3MB
Dimensions:    (depth: 1, latitude: 45, longitude: 121, time: 40)
Coordinates:
  * depth      (depth) float32 4B 0.494
  * latitude   (latitude) float32 180B 55.0 55.25 55.5 55.75 ... 65.5 65.75 66.0
  * longitude  (longitude) float32 484B -35.0 -34.75 -34.5 ... -5.5 -5.25 -5.0
  * time       (time) datetime64[ns]

100%|██████████| 170/170 [00:27<00:00,  6.23it/s]

INFO - 2024-06-09T16:59:48Z - Successfully downloaded to c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m_April-May2024.nc





CMEMS_cmems_obs_mob_glo_phy-cur_nrt_0.25deg_P1D-m_April-May2024.nc does not exist. Proceeding to download.
INFO - 2024-06-09T16:59:49Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-06-09T16:59:49Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-06-09T16:59:56Z - Service was not specified, the default one was selected: "arco-time-series"
INFO - 2024-06-09T17:00:02Z - Downloading using service arco-time-series...
INFO - 2024-06-09T17:00:10Z - <xarray.Dataset> Size: 20MB
Dimensions:    (depth: 1, latitude: 44, longitude: 120, time: 39)
Coordinates:
  * depth      (depth) int16 2B 0
  * latitude   (latitude) float32 176B 55.12 55.38 55.62 ... 65.38 65.62 65.88
  * longitude  (longitude) float32 480B -34.88 -34.62 -34.38 ... -5.375 -5.125
  * time       (time) datetime64[ns] 312B 2024-05-01 2024-05-02 ... 2024-06-08
Data variables:
    err_ue     (time, depth, latitude, longitude) float64 2MB dask.array<chunksize=(

100%|██████████| 326/326 [00:13<00:00, 24.70it/s] 

INFO - 2024-06-09T17:01:09Z - Successfully downloaded to c:\Users\hanshil\Documents\GitHub\biocarbon_nrt_data_viz\data\satellite\CMEMS_cmems_obs_mob_glo_phy-cur_nrt_0.25deg_P1D-m_April-May2024.nc



