# Copernicus data
In this notebook we download observation data from Copernicus for some chosen stations.

### Load packages

In [1]:
import copernicusmarine
from pprint import pprint
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import re
import os

### Define parameters

In [3]:
username = 'mjessen'
password = 'gM-Rzb4cS4Lwudk'
dataset_id = 'cmems_obs-ins_glo_phybgcwav_mynrt_na_irr'
part = 'latest'
params_cat = 'waves'
feature = 'TS'

start_date = '2022-01-01'
end_date = '2022-12-31'

north = 56
south = 50
west = -2
east = 10

### Read index file

In [4]:
df = pd.read_csv("copernicus-data/index_monthly.txt", skiprows=5)
print(df.shape)
df.columns

(542559, 12)


Index(['# product_id', 'file_name', 'geospatial_lat_min', 'geospatial_lat_max',
       'geospatial_lon_min', 'geospatial_lon_max', 'time_coverage_start',
       'time_coverage_end', 'institution', 'date_update', 'data_mode',
       'parameters'],
      dtype='object')

### Filter stations by location and parameters

In [6]:
# Filter by location and parameters
filtered_df = df[(df['geospatial_lat_min'] > south) &
                 (df['geospatial_lat_max'] < north) &
                 (df['geospatial_lon_min'] > west) &
                 (df['geospatial_lon_max'] < east) &
                 (df['geospatial_lon_max'] == df['geospatial_lon_min'] ) &
                 (df['geospatial_lat_max'] == df['geospatial_lat_min'] ) &
                 (df['parameters'].str.contains("VHM0", na=False)) &
                 (df['parameters'].str.contains("VTZA", na=False)) &
                 #(df['parameters'].str.contains("VTM01", na=False)) | (df['parameters'].str.contains("VTM02", na=False))
                 (df['file_name'].str.contains('202201'))
                 #& (df['file_name'].str.contains('Q1'))
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

# Print basenames of filenames
file_names = file_df['file_name']
for name in file_names:
    basename = os.path.basename(name)
    print(basename)

(74, 12)
NO_TS_MO_6200170_202201.nc
NO_TS_MO_6200304_202201.nc
NO_TS_MO_6200305_202201.nc
NO_TS_MO_6201067_202201.nc
NO_TS_MO_6201068_202201.nc
NO_TS_MO_6201082_202201.nc
NO_TS_MO_6201083_202201.nc
NO_TS_MO_6202108_202201.nc
NO_TS_MO_A121_202201.nc
NO_TS_MO_A122_202201.nc
NO_TS_MO_A2Buoy_202201.nc
NO_TS_MO_AWG_202201.nc
NO_TS_MO_AkkaertSouthwestBuoy_202201.nc
NO_TS_MO_BolVanHeistBuoy_202201.nc
NO_TS_MO_Brouwershavensegat_202201.nc
NO_TS_MO_CadzandBoei_202201.nc
NO_TS_MO_Deurlo_202201.nc
NO_TS_MO_DomburgerRassen_202201.nc
NO_TS_MO_EurogeulDWE_202201.nc
NO_TS_MO_EurogeulE13_202201.nc
NO_TS_MO_Europlatform2_202201.nc
NO_TS_MO_Europlatform3_202201.nc
NO_TS_MO_F3platform_202201.nc
NO_TS_MO_Fanoebugt_202201.nc
NO_TS_MO_Hansweert_202201.nc
NO_TS_MO_HonteSloehaven_202201.nc
NO_TS_MO_IJmuidenMunitiestort2_202201.nc
NO_TS_MO_IJmuidenMunitiestort_202201.nc
NO_TS_MO_J61_202201.nc
NO_TS_MO_K13a2_202201.nc
NO_TS_MO_K13a3_202201.nc
NO_TS_MO_K13a_202201.nc
NO_TS_MO_K141_202201.nc
NO_TS_MO_KeetenBoei_2

### Add to stations.txt file

In [26]:
with open("stations.txt", "w") as f:
    f.write('6202108\n')
    f.write('AkkaertSouthwestBuoy\n')
    f.write('Europlatform2\n')
    f.write('Europlatform3\n')
    f.write('F16\n')
    f.write('F3platform\n')
    f.write('J61\n')
    f.write('K13a_\n')
    f.write('K141_\n')
    f.write('L91_\n')
    f.write('LichteilandGoeree1_\n')
    f.write('Q1_\n')
    f.write('WaddenEierlandseGat\n')
    f.write('WesthinderBuoy\n')

### Open stations.txt file

In [27]:
with open("stations.txt", "r") as f:
    station_files = [line.strip() for line in f if line.strip()]

station_files

['6202108',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F16',
 'F3platform',
 'J61',
 'K13a_',
 'K141_',
 'L91_',
 'LichteilandGoeree1_',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy']

### Create dataframe with stations

In [28]:
# Modify station files
pattern = "|".join(re.escape(s) for s in station_files)  # escape in case of special characters

# Filter by location and parameters
filtered_df = df[(df['file_name'].str.contains(pattern, na=False))
                 & (df['file_name'].str.contains('202201'))
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

# Print basenames of filenames
file_names = file_df['file_name']
for name in file_names:
    basename = os.path.basename(name)
    print(basename)

(15, 12)
NO_TS_MO_6202108_202201.nc
NO_TS_MO_AkkaertSouthwestBuoy_202201.nc
NO_TS_MO_Europlatform2_202201.nc
NO_TS_MO_Europlatform3_202201.nc
NO_TS_MO_F3platform_202201.nc
NO_TS_MO_J61_202201.nc
NO_TS_MO_K13a_202201.nc
NO_TS_MO_K141_202201.nc
NO_TS_MO_L91_202201.nc
NO_TS_MO_LichteilandGoeree1_202201.nc
NO_TS_MO_Q1_202201.nc
NO_TS_MO_WaddenEierlandseGat_202201.nc
NO_TS_MO_WesthinderBuoy_202201.nc
NO_TS_TG_F3platformTG_202201.nc
NO_TS_TG_J61TG_202201.nc


### Create stations.csv file

In [29]:
stations_data = []

for station in station_files:
    for name in filtered_df['file_name']:
        if station in name:
            row = filtered_df[filtered_df['file_name'] == name]
            lat, lon = row['geospatial_lat_min'].values[0], row['geospatial_lon_min'].values[0]
            stations_data.append({'station': station, 'lat': lat, 'lon': lon})

station_df = pd.DataFrame(stations_data)
#print(station_df)

# Keep only unique rows
df_unique = station_df.drop_duplicates(subset='station', keep='first')
print(df_unique)

# Save to CSV
df_unique.to_csv('../observations/stations.csv', index=False)

                 station       lat      lon
0                6202108  54.01433  6.58385
1   AkkaertSouthwestBuoy  51.41600  2.81900
2          Europlatform2  51.99861  3.27639
3          Europlatform3  51.99861  3.27639
4             F3platform  54.85389  4.72750
6                    J61  53.81663  2.95000
8                  K13a_  53.21778  3.22000
9                  K141_  53.26671  3.63327
10                  L91_  53.61380  4.96070
11   LichteilandGoeree1_  51.92583  3.66972
12                   Q1_  52.92640  4.15060
13   WaddenEierlandseGat  53.27694  4.66167
14        WesthinderBuoy  51.38110  2.43580


### Create output txt file with files to download

In [30]:
pattern

'6202108|AkkaertSouthwestBuoy|Europlatform2|Europlatform3|F16|F3platform|J61|K13a_|K141_|L91_|LichteilandGoeree1_|Q1_|WaddenEierlandseGat|WesthinderBuoy'

In [31]:
# Filter by location and parameters
filtered_df = df[(df['file_name'].str.contains(pattern, na=False))
                 & (df['file_name'].str.contains('2022')) 
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

(179, 12)


In [32]:
with open("output.txt", "w") as f:
    for filename in file_df["file_name"]:
        f.write(str(filename) + "\n")

# Define list with files to download
file_list = 'output.txt'

# Define output storage parameters
output_directory = './raw-data'

### Download files

Download all files from file_list to output_directory. 

In [33]:
# Download the file to the station folder 
copernicusmarine.get(
    username=username,
    password=password,
    dataset_id=dataset_id,
    index_parts=False,
    file_list = file_list,
    output_directory=output_directory,
    no_directories=True
)


INFO - 2025-09-03T11:43:45Z - Selected dataset version: "202311"
INFO - 2025-09-03T11:43:45Z - Selected dataset part: "latest"
Downloading files: 100%|██████████| 179/179 [02:08<00:00,  1.39it/s]


ResponseGet(files=[FileGet(s3_url='s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/CT/202212/BO_PR_CT_SMHIF16_202212.nc', https_url='https://s3.waw3-1.cloudferro.com/mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/CT/202212/BO_PR_CT_SMHIF16_202212.nc', file_size=0.10920906066894531, last_modified_datetime='2024-06-18T21:12:52+00:00', etag='"dcbcd60b7e1a81cb47eb5ba41cde1ed4"', file_format='.nc', output_directory=WindowsPath('raw-data'), filename='BO_PR_CT_SMHIF16_202212.nc', file_path=WindowsPath('raw-data/BO_PR_CT_SMHIF16_202212.nc'), file_status='DOWNLOADED'), FileGet(s3_url='s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202201/NO_TS_MO_6202108_202201.nc', https_url='https://s3.waw3-1.cloudferro.com/mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT