<a href="https://colab.research.google.com/github/BahneTP/spatiotemporal-mining-medsea/blob/main/spatiotemporal_mining_medsea_acquisition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Acquisition

This notebook handles the download of the **[Global Ocean Physics Reanalysis](https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_PHY_001_030/download?dataset=cmems_mod_glo_phy_my_0.083deg_P1M-m_202311)** dataset from **Copernicus**.

For further work, see:
- [Exploratory Data Analysis](./eda.ipynb)  
- [Data Mining](./mining.ipynb)


In [1]:
!pip install copernicusmarine
!pip install zarr fsspec
!pip install "xarray>=2024.1.0"

Collecting copernicusmarine
  Using cached copernicusmarine-2.1.2-py3-none-any.whl.metadata (8.1 kB)
Collecting arcosparse<0.5.0,>=0.4.0 (from copernicusmarine)
  Using cached arcosparse-0.4.1-py3-none-any.whl.metadata (5.0 kB)
Collecting boto3>=1.26 (from copernicusmarine)
  Downloading boto3-1.38.42-py3-none-any.whl.metadata (6.6 kB)
Collecting h5netcdf<2.0.0,>=1.4.0 (from copernicusmarine)
  Using cached h5netcdf-1.6.1-py3-none-any.whl.metadata (13 kB)
Collecting lxml>=4.9.0 (from copernicusmarine)
  Using cached lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.5 kB)
Collecting pydantic<3.0.0,>=2.9.1 (from copernicusmarine)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting pystac>=1.8.3 (from copernicusmarine)
  Using cached pystac-1.13.0-py3-none-any.whl.metadata (4.7 kB)
Collecting semver>=3.0.2 (from copernicusmarine)
  Using cached semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)
Collecting zarr>=2.13.3 (from copernicusmarine)
  Using cached 

In [2]:
import sys
!{sys.executable} -m pip install "numpy<2.0"



In [3]:
import xarray as xr
import copernicusmarine
import os

parent = os.path.dirname(os.getcwd())
path = os.path.join(parent, "data")
os.makedirs(path, exist_ok=True)

In [4]:
# # Monthly. Salinity, Temperature, more depths...

# output_file = os.path.join(path, "medsea.nc")
# ds = copernicusmarine.subset(
#     dataset_id="cmems_mod_glo_phy_my_0.083deg_P1M-m",
#     variables=["so", "thetao"],
#     minimum_longitude=-6.285859234924248,
#     maximum_longitude=36.52446704416333,
#     minimum_latitude=29.252430574547926,
#     maximum_latitude=46.2175134343721,
#     start_datetime="1993-01-01T00:00:00",
#     end_datetime="2021-06-01T00:00:00",
#     minimum_depth=0.49402499198913574,
#     maximum_depth=1062.43994140625,
#     output_filename= output_file
# )

In [None]:
import xarray as xr
import os

depths = [
    47.37369155883789,
    318.1274108886719,
    1062.43994140625
]

for depth in depths:
    output_file = os.path.join(path, f"medsea_daily_depth_{int(depth)}.nc")
    ds = copernicusmarine.subset(
        dataset_id="cmems_mod_glo_phy_my_0.083deg_P1D-m",
        variables=["thetao", "so"],
        minimum_longitude=-6.285859234924248,
        maximum_longitude=36.52446704416333,
        minimum_latitude=29.252430574547926,
        maximum_latitude=46.2175134343721,
        start_datetime="1993-01-01T00:00:00",
        end_datetime="2021-06-01T00:00:00",
        minimum_depth=depth,
        maximum_depth=depth,
        output_filename=output_file
    )

INFO - 2025-06-24T12:15:19Z - Downloading Copernicus Marine data requires a Copernicus Marine username and password, sign up for free at: https://data.marine.copernicus.eu/register


Copernicus Marine username:

  bthielpeters


Copernicus Marine password:

  ········


INFO - 2025-06-24T12:15:36Z - Selected dataset version: "202311"
INFO - 2025-06-24T12:15:36Z - Selected dataset part: "default"
INFO - 2025-06-24T12:15:38Z - Starting download. Please wait...


  0%|          | 0/8580 [00:00<?, ?it/s]

In [None]:
import xarray as xr
import os

depths = [
    47.37369155883789,
    318.1274108886719,
    1062.43994140625
]

datasets = []
for depth in depths:
    file_path = os.path.join(path, f"medsea_daily_depth_{int(depth)}.nc")
    ds = xr.open_dataset(file_path)

    # Entferne die depth-Koordinate und -Variable vollständig
    if "depth" in ds.coords:
        ds = ds.drop_vars("depth", errors="ignore")
    if "depth" in ds.dims:
        ds = ds.squeeze("depth", drop=True)  # falls "depth" eine Dimension ist mit Länge 1

    # Füge neue Tiefe als neue Dimension wieder hinzu
    ds = ds.expand_dims({"depth": [depth]})
    datasets.append(ds)

combined = xr.concat(datasets, dim="depth")
combined = combined.sortby("depth")

output_combined = os.path.join(path, "medsea_combined_daily.nc")
combined.to_netcdf(output_combined)
print(f"Combined file: {output_combined}")

