# Copernicus data
In this notebook we download observation data from Copernicus for some chosen stations.

### Load packages

In [1]:
import copernicusmarine
from pprint import pprint
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import re
import os

### Define parameters

In [3]:
username = 'mjessen'
password = 'gM-Rzb4cS4Lwudk'
dataset_id = 'cmems_obs-ins_glo_phybgcwav_mynrt_na_irr'
part = 'latest'
params_cat = 'waves'
feature = 'TS'

start_date = '2022-01-01'
end_date = '2022-12-31'

north = 56
south = 50
west = -2
east = 10

### Read index file

In [4]:
df = pd.read_csv("copernicus-data/index_monthly.txt", skiprows=5)
print(df.shape)
df.columns

(542559, 12)


Index(['# product_id', 'file_name', 'geospatial_lat_min', 'geospatial_lat_max',
       'geospatial_lon_min', 'geospatial_lon_max', 'time_coverage_start',
       'time_coverage_end', 'institution', 'date_update', 'data_mode',
       'parameters'],
      dtype='object')

### Filter stations by location and parameters

In [6]:
# Filter by location and parameters
filtered_df = df[(df['geospatial_lat_min'] > south) &
                 (df['geospatial_lat_max'] < north) &
                 (df['geospatial_lon_min'] > west) &
                 (df['geospatial_lon_max'] < east) &
                 (df['geospatial_lon_max'] == df['geospatial_lon_min'] ) &
                 (df['geospatial_lat_max'] == df['geospatial_lat_min'] ) &
                 (df['parameters'].str.contains("VHM0", na=False)) &
                 (df['parameters'].str.contains("VTZA", na=False)) &
                 #(df['parameters'].str.contains("VTM01", na=False)) | (df['parameters'].str.contains("VTM02", na=False))
                 (df['file_name'].str.contains('202201'))
                 #& (df['file_name'].str.contains('Q1'))
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

# Print basenames of filenames
file_names = file_df['file_name']
for name in file_names:
    basename = os.path.basename(name)
    print(basename)

(74, 12)
NO_TS_MO_6200170_202201.nc
NO_TS_MO_6200304_202201.nc
NO_TS_MO_6200305_202201.nc
NO_TS_MO_6201067_202201.nc
NO_TS_MO_6201068_202201.nc
NO_TS_MO_6201082_202201.nc
NO_TS_MO_6201083_202201.nc
NO_TS_MO_6202108_202201.nc
NO_TS_MO_A121_202201.nc
NO_TS_MO_A122_202201.nc
NO_TS_MO_A2Buoy_202201.nc
NO_TS_MO_AWG_202201.nc
NO_TS_MO_AkkaertSouthwestBuoy_202201.nc
NO_TS_MO_BolVanHeistBuoy_202201.nc
NO_TS_MO_Brouwershavensegat_202201.nc
NO_TS_MO_CadzandBoei_202201.nc
NO_TS_MO_Deurlo_202201.nc
NO_TS_MO_DomburgerRassen_202201.nc
NO_TS_MO_EurogeulDWE_202201.nc
NO_TS_MO_EurogeulE13_202201.nc
NO_TS_MO_Europlatform2_202201.nc
NO_TS_MO_Europlatform3_202201.nc
NO_TS_MO_F3platform_202201.nc
NO_TS_MO_Fanoebugt_202201.nc
NO_TS_MO_Hansweert_202201.nc
NO_TS_MO_HonteSloehaven_202201.nc
NO_TS_MO_IJmuidenMunitiestort2_202201.nc
NO_TS_MO_IJmuidenMunitiestort_202201.nc
NO_TS_MO_J61_202201.nc
NO_TS_MO_K13a2_202201.nc
NO_TS_MO_K13a3_202201.nc
NO_TS_MO_K13a_202201.nc
NO_TS_MO_K141_202201.nc
NO_TS_MO_KeetenBoei_2

### Add to stations.txt file

In [36]:
with open("stations.txt", "w") as f:
    f.write('6202108\n')
    f.write('AkkaertSouthwestBuoy\n')
    f.write('Europlatform2\n')
    f.write('Europlatform3\n')
    f.write('F3platform\n')
    f.write('IJmuidenMunitiestort_\n')
    f.write('J61\n')
    f.write('K13a_\n')
    f.write('K141_\n')
    f.write('KeetenBoei\n')
    f.write('KwintebankBuoy\n')
    f.write('L91_\n')
    f.write('LichteilandGoeree1_\n')
    f.write('MaeslantkeringZeezijdeNoordMeetpaal\n')
    f.write('MaeslantkeringZeezijdeZuidMeetpaal\n')
    f.write('NieuwpoortBuoy\n')
    f.write('Nymindegab\n')
    f.write('Oosterschelde11\n')
    f.write('OstendEasternPalisadeBuoy\n')
    f.write('OverloopVanValkenisse\n')
    f.write('PasVanTerneuzenBoei\n')
    f.write('Q1_\n')
    f.write('WaddenEierlandseGat\n')
    f.write('WesthinderBuoy\n')
    f.write('ZeebruggeZandopvangkadeBuoy\n')
    f.write('ZwinBuoy\n')

### Open stations.txt file

In [37]:
with open("stations.txt", "r") as f:
    station_files = [line.strip() for line in f if line.strip()]

station_files

['6202108',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F3platform',
 'IJmuidenMunitiestort_',
 'J61',
 'K13a_',
 'K141_',
 'KeetenBoei',
 'KwintebankBuoy',
 'L91_',
 'LichteilandGoeree1_',
 'MaeslantkeringZeezijdeNoordMeetpaal',
 'MaeslantkeringZeezijdeZuidMeetpaal',
 'NieuwpoortBuoy',
 'Nymindegab',
 'Oosterschelde11',
 'OstendEasternPalisadeBuoy',
 'OverloopVanValkenisse',
 'PasVanTerneuzenBoei',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy',
 'ZeebruggeZandopvangkadeBuoy',
 'ZwinBuoy']

### Create dataframe with stations

In [38]:
# Modify station files
pattern = "|".join(re.escape(s) for s in station_files)  # escape in case of special characters

# Filter by location and parameters
filtered_df = df[(df['file_name'].str.contains(pattern, na=False))
                 & (df['file_name'].str.contains('202201'))
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

# Print basenames of filenames
file_names = file_df['file_name']
for name in file_names:
    basename = os.path.basename(name)
    print(basename)

(29, 12)
NO_TS_MO_6202108_202201.nc
NO_TS_MO_AkkaertSouthwestBuoy_202201.nc
NO_TS_MO_Europlatform2_202201.nc
NO_TS_MO_Europlatform3_202201.nc
NO_TS_MO_F3platform_202201.nc
NO_TS_MO_IJmuidenMunitiestort_202201.nc
NO_TS_MO_J61_202201.nc
NO_TS_MO_K13a_202201.nc
NO_TS_MO_K141_202201.nc
NO_TS_MO_KeetenBoei_202201.nc
NO_TS_MO_KwintebankBuoy_202201.nc
NO_TS_MO_L91_202201.nc
NO_TS_MO_LichteilandGoeree1_202201.nc
NO_TS_MO_MaeslantkeringZeezijdeNoordMeetpaal_202201.nc
NO_TS_MO_MaeslantkeringZeezijdeZuidMeetpaal_202201.nc
NO_TS_MO_NieuwpoortBuoy_202201.nc
NO_TS_MO_Nymindegab_202201.nc
NO_TS_MO_Oosterschelde11_202201.nc
NO_TS_MO_OstendEasternPalisadeBuoy_202201.nc
NO_TS_MO_OverloopVanValkenisse_202201.nc
NO_TS_MO_PasVanTerneuzenBoei_202201.nc
NO_TS_MO_Q1_202201.nc
NO_TS_MO_WaddenEierlandseGat_202201.nc
NO_TS_MO_WesthinderBuoy_202201.nc
NO_TS_MO_ZeebruggeZandopvangkadeBuoy_202201.nc
NO_TS_MO_ZwinBuoy_202201.nc
NO_TS_TG_F3platformTG_202201.nc
NO_TS_TG_J61TG_202201.nc
NO_TS_TG_Oosterschelde11TG_20220

### Create stations.csv file

In [49]:
stations_data = []

for station in station_files:
    for name in filtered_df['file_name']:
        if station in name:
            row = filtered_df[filtered_df['file_name'] == name]
            lat, lon = row['geospatial_lat_min'].values[0], row['geospatial_lon_min'].values[0]
            stations_data.append({'station': station, 'lat': lat, 'lon': lon})

station_df = pd.DataFrame(stations_data)
#print(station_df)

# Keep only unique rows
df_unique = station_df.drop_duplicates(subset='station', keep='first')
print(df_unique)

# Save to CSV
df_unique.to_csv('../observations/stations.csv', index=False)

                                 station       lat      lon
0                                6202108  54.01433  6.58385
12                  AkkaertSouthwestBuoy  51.41600  2.81900
24                         Europlatform2  51.99861  3.27639
36                         Europlatform3  51.99861  3.27639
48                            F3platform  54.85389  4.72750
72                 IJmuidenMunitiestort_  52.55000  4.05833
84                                   J61  53.81663  2.95000
108                                K13a_  53.21778  3.22000
120                                K141_  53.26671  3.63327
132                           KeetenBoei  51.60800  3.96600
144                       KwintebankBuoy  51.34900  2.70600
156                                 L91_  53.61380  4.96070
168                  LichteilandGoeree1_  51.92583  3.66972
180  MaeslantkeringZeezijdeNoordMeetpaal  51.96100  4.15900
192   MaeslantkeringZeezijdeZuidMeetpaal  51.95780  4.15450
204                       NieuwpoortBuoy

### Create output txt file with files to download

In [50]:
pattern

'6202108|AkkaertSouthwestBuoy|Europlatform2|Europlatform3|F3platform|IJmuidenMunitiestort_|J61|K13a_|K141_|KeetenBoei|KwintebankBuoy|L91_|LichteilandGoeree1_|MaeslantkeringZeezijdeNoordMeetpaal|MaeslantkeringZeezijdeZuidMeetpaal|NieuwpoortBuoy|Nymindegab|Oosterschelde11|OstendEasternPalisadeBuoy|OverloopVanValkenisse|PasVanTerneuzenBoei|Q1_|WaddenEierlandseGat|WesthinderBuoy|ZeebruggeZandopvangkadeBuoy|ZwinBuoy'

In [51]:
# Filter by location and parameters
filtered_df = df[(df['file_name'].str.contains(pattern, na=False))
                 & (df['file_name'].str.contains('2022')) 
                ]

print(filtered_df.shape)

# Copy df for later use
file_df = filtered_df[['file_name']].copy()

(346, 12)


Update output.txt 

In [52]:
with open("output.txt", "w") as f:
    for filename in file_df["file_name"]:
        f.write(str(filename) + "\n")

In [53]:
# Define list with files to download
file_list = 'output.txt'

# Define output storage parameters
output_directory = './raw-data'

### Download files

Download all files from file_list to output_directory. 

In [48]:
# Download the file to the station folder 
copernicusmarine.get(
    username=username,
    password=password,
    dataset_id=dataset_id,
    index_parts=False,
    file_list = file_list,
    output_directory=output_directory,
    no_directories=True
)


INFO - 2025-09-03T13:02:01Z - Selected dataset version: "202311"
INFO - 2025-09-03T13:02:01Z - Selected dataset part: "latest"
Downloading files: 100%|██████████| 168/168 [01:58<00:00,  1.42it/s]


ResponseGet(files=[FileGet(s3_url='s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202201/NO_TS_MO_IJmuidenMunitiestort_202201.nc', https_url='https://s3.waw3-1.cloudferro.com/mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202201/NO_TS_MO_IJmuidenMunitiestort_202201.nc', file_size=0.4287881851196289, last_modified_datetime='2023-11-18T16:01:17+00:00', etag='"7ac8ba396b186c63d89f5f0d3c41182c"', file_format='.nc', output_directory=WindowsPath('raw-data'), filename='NO_TS_MO_IJmuidenMunitiestort_202201.nc', file_path=WindowsPath('raw-data/NO_TS_MO_IJmuidenMunitiestort_202201.nc'), file_status='DOWNLOADED'), FileGet(s3_url='s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202202/NO_TS_MO_IJmuidenMunitiestort_202202.nc', https_url='https://s3.waw3-1.cloudfe