# Preprocess stations
In this noteook we load the nc files for the stations in stations.txt and convert to csv files.

### Load packages

In [1]:
import pandas as pd
import xarray as xr
import os

### Open and read stations in txt file

In [2]:
with open("stations.txt", "r") as f:
    station_files = [line.strip() for line in f if line.strip()]

# Print a subset of the station files
print("Subset of station files:")
station_files[0:5]

Subset of station files:


['6200144', '6200145', '6201045', '6201047', '6201050']

### Open and read file names in txt file

In [3]:
# Get full file names from output.txt
with open("output.txt", "r") as f:
    file_names = [line.strip() for line in f if line.strip()]

# Print a subset of the file names
print("Subset of file names:")
file_names[0:3]

Subset of file names:


['INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202201/NO_TS_MO_6200144_202201.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202202/NO_TS_MO_6200144_202202.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202203/NO_TS_MO_6200144_202203.nc']

### Convert nc files to csv files

In [8]:
# Define observation folder
obs_fldr = 'raw-data'

for station in station_files:
    stations_data = []

    for name in file_names:
        if station in name:
            # Load the NetCDF file if the path exists
            basename = os.path.basename(name)
            file_path = os.path.join(obs_fldr, basename)

            if os.path.exists(file_path):
                ds = xr.open_dataset(file_path)
            else:
                continue

            # Convert to dataframe
            df = ds.to_dataframe().reset_index()

            # Extract the wanted variables
            cols = []
            df['datetime'] = df['TIME']
            cols.append('datetime')

            if 'VHM0' in df.columns:
                df['significant_wave_height'] = df['VHM0']
                cols.append('significant_wave_height')

            if 'VTPK' in df.columns:
                df['peak_wave_period'] = df['VTPK']
                cols.append('peak_wave_period')

            if 'VMDR' in df.columns:
                df['mean_wave_direction'] = df['VMDR']
                cols.append('mean_wave_direction')
                
            df = df[cols].copy()
            df = df.dropna()

            stations_data.append(df)
    
    # Convert to csv
    full_df = pd.concat(stations_data)

    # Remove trailing underscore if it exists
    station_clean = station.rstrip("_")
    csv_path = f"../observations/measurements/{station_clean}.csv"

    if not os.path.exists(csv_path):
        full_df.to_csv(csv_path, index=False)
        print(f"Saved {station_clean}.csv")
    else:
        print(f"Skipped saving {station_clean}.csv (file already exists)")

Saved 6200144.csv
Saved 6200145.csv
Saved 6201045.csv
Saved 6201047.csv
Saved 6201050.csv
Saved 6201059.csv
Saved A121.csv
Saved AkkaertSouthwestBuoy.csv
Saved Europlatform2.csv
Saved Europlatform3.csv
Saved F3platform.csv
Saved IJmuidenMunitiestort.csv
Saved J61.csv
Saved K13a.csv
Saved K141.csv
Saved KeetenBoei.csv
Saved KwintebankBuoy.csv
Saved L91.csv
Saved LichteilandGoeree1.csv
Saved MaeslantkeringZeezijdeNoordMeetpaal.csv
Saved MaeslantkeringZeezijdeZuidMeetpaal.csv
Saved NieuwpoortBuoy.csv
Saved Nymindegab.csv
Saved Oosterschelde11.csv
Saved OstendEasternPalisadeBuoy.csv
Saved OverloopVanValkenisse.csv
Saved PasVanTerneuzenBoei.csv
Saved Q1.csv
Saved WaddenEierlandseGat.csv
Saved WesthinderBuoy.csv
Saved ZeebruggeZandopvangkadeBuoy.csv
Saved ZwinBuoy.csv
