# Preprocess stations
In this noteook we load the nc files for the stations in stations.txt and convert to csv files.

### Load packages

In [1]:
import pandas as pd
import xarray as xr
import numpy as np
import time
import netCDF4 as nc
import os

### Open and read stations in txt file

In [2]:
with open("stations.txt", "r") as f:
    station_files = [line.strip() for line in f if line.strip()]

station_files

['6200144',
 '6200145',
 '6201045',
 '6201047',
 '6201050',
 '6201059',
 'A121_',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F3platform',
 'IJmuidenMunitiestort_',
 'J61',
 'K13a_',
 'K141_',
 'KeetenBoei',
 'KwintebankBuoy',
 'L91_',
 'LichteilandGoeree1_',
 'MaeslantkeringZeezijdeNoordMeetpaal',
 'MaeslantkeringZeezijdeZuidMeetpaal',
 'NieuwpoortBuoy',
 'Nymindegab',
 'Oosterschelde11',
 'OstendEasternPalisadeBuoy',
 'OverloopVanValkenisse',
 'PasVanTerneuzenBoei',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy',
 'ZeebruggeZandopvangkadeBuoy',
 'ZwinBuoy']

In [3]:
# Get full file names from output.txt
with open("output.txt", "r") as f:
    file_names = [line.strip() for line in f if line.strip()]

file_names

['INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202201/NO_TS_MO_6200144_202201.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202202/NO_TS_MO_6200144_202202.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202203/NO_TS_MO_6200144_202203.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202204/NO_TS_MO_6200144_202204.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202205/NO_TS_MO_6200144_202205.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202206/NO_TS_MO_6200144_202206.nc',
 'INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/monthly/MO/202207/NO_TS_MO_6200144_202207.nc',
 'INSITU_GLO_

### Convert nc files to csv files

In [4]:
station_files

['6200144',
 '6200145',
 '6201045',
 '6201047',
 '6201050',
 '6201059',
 'A121_',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F3platform',
 'IJmuidenMunitiestort_',
 'J61',
 'K13a_',
 'K141_',
 'KeetenBoei',
 'KwintebankBuoy',
 'L91_',
 'LichteilandGoeree1_',
 'MaeslantkeringZeezijdeNoordMeetpaal',
 'MaeslantkeringZeezijdeZuidMeetpaal',
 'NieuwpoortBuoy',
 'Nymindegab',
 'Oosterschelde11',
 'OstendEasternPalisadeBuoy',
 'OverloopVanValkenisse',
 'PasVanTerneuzenBoei',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy',
 'ZeebruggeZandopvangkadeBuoy',
 'ZwinBuoy']

In [5]:
# Define observation folder
obs_fldr = 'raw-data'

for station in station_files:
    stations_data = []

    for name in file_names:
        if station in name:
            # Load the NetCDF file
            basename = os.path.basename(name)
            ds = xr.open_dataset(os.path.join(obs_fldr, basename))

            # Convert to dataframe
            df = ds.to_dataframe().reset_index()

            # Extract the wanted variables
            cols = ['TIME']

            if 'VHM0' in df.columns:
                cols.append('VHM0')

            if 'VTZA' in df.columns:
                cols.append('VTZA')

            if 'VMDR' in df.columns:
                cols.append('VMDR')
            
            if 'VTPK' in df.columns:
                cols.append('VTPK')
                
            df = df[cols].copy()
            df = df.dropna()

            stations_data.append(df)

KeyboardInterrupt: 

In [8]:
# Define observation folder
obs_fldr = 'raw-data'

for station in station_files:
    stations_data = []

    for name in file_names:
        if station in name:
            # Load the NetCDF file
            basename = os.path.basename(name)
            ds = xr.open_dataset(os.path.join(obs_fldr, basename))

            # Convert to dataframe
            df = ds.to_dataframe().reset_index()

            # Extract the wanted variables
            cols = ['TIME']

            if 'VHM0' in df.columns:
                cols.append('VHM0')

            if 'VTZA' in df.columns:
                cols.append('VTZA')

            if 'VMDR' in df.columns:
                cols.append('VMDR')
            
            if 'VTPK' in df.columns:
                cols.append('VTPK')
                
            df = df[cols].copy()
            df = df.dropna()

            stations_data.append(df)

    # Convert to csv
    full_df = pd.concat(stations_data)
    full_df.to_csv(f"../observations/{station}.csv", index=False)
    print(f"Saved {station}.csv")


Saved 6200145.csv


ValueError: No objects to concatenate