# Preprocess stations
In this noteook we load the nc files for the stations in stations.txt and convert to csv files.

### Load packages

In [49]:
import pandas as pd
import xarray as xr
import numpy as np
import time
import netCDF4 as nc
import os

### Open and read stations in txt file

In [56]:
with open("stations.txt", "r") as f:
    station_files = [line.strip() for line in f if line.strip()]

station_files

['6202108',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F3platform',
 'IJmuidenMunitiestort_',
 'J61',
 'K13a_',
 'K141_',
 'KeetenBoei',
 'KwintebankBuoy',
 'L91_',
 'LichteilandGoeree1_',
 'MaeslantkeringZeezijdeNoordMeetpaal',
 'MaeslantkeringZeezijdeZuidMeetpaal',
 'NieuwpoortBuoy',
 'Nymindegab',
 'Oosterschelde11',
 'OstendEasternPalisadeBuoy',
 'OverloopVanValkenisse',
 'PasVanTerneuzenBoei',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy',
 'ZeebruggeZandopvangkadeBuoy',
 'ZwinBuoy']

In [57]:
# Get full file names from output.txt
with open("output.txt", "r") as f:
    file_names = [line.strip() for line in f if line.strip()]

#file_names

### Convert nc files to csv files

In [58]:
station_files

['6202108',
 'AkkaertSouthwestBuoy',
 'Europlatform2',
 'Europlatform3',
 'F3platform',
 'IJmuidenMunitiestort_',
 'J61',
 'K13a_',
 'K141_',
 'KeetenBoei',
 'KwintebankBuoy',
 'L91_',
 'LichteilandGoeree1_',
 'MaeslantkeringZeezijdeNoordMeetpaal',
 'MaeslantkeringZeezijdeZuidMeetpaal',
 'NieuwpoortBuoy',
 'Nymindegab',
 'Oosterschelde11',
 'OstendEasternPalisadeBuoy',
 'OverloopVanValkenisse',
 'PasVanTerneuzenBoei',
 'Q1_',
 'WaddenEierlandseGat',
 'WesthinderBuoy',
 'ZeebruggeZandopvangkadeBuoy',
 'ZwinBuoy']

In [59]:
# Define observation folder
obs_fldr = 'raw-data'

for station in station_files:
    stations_data = []

    for name in file_names:
        if station in name:
            # Load the NetCDF file
            basename = os.path.basename(name)
            ds = xr.open_dataset(os.path.join(obs_fldr, basename))

            # Convert to dataframe
            df = ds.to_dataframe().reset_index()

            # Extract the wanted variables
            cols = ['TIME']

            if 'VHM0' in df.columns:
                cols.append('VHM0')

            if 'VTZA' in df.columns:
                cols.append('VTZA')

            if 'VMDR' in df.columns:
                cols.append('VMDR')
            
            if 'VTPK' in df.columns:
                cols.append('VTPK')
                
            df = df[cols].copy()
            df = df.dropna()

            stations_data.append(df)

    # Convert to csv
    full_df = pd.concat(stations_data)
    full_df.to_csv(f"../observations/{station}.csv", index=False)
    print(f"Saved {station}.csv")


Saved 6202108.csv
Saved AkkaertSouthwestBuoy.csv
Saved Europlatform2.csv
Saved Europlatform3.csv
Saved F3platform.csv
Saved IJmuidenMunitiestort_.csv
Saved J61.csv
Saved K13a_.csv
Saved K141_.csv
Saved KeetenBoei.csv
Saved KwintebankBuoy.csv
Saved L91_.csv
Saved LichteilandGoeree1_.csv
Saved MaeslantkeringZeezijdeNoordMeetpaal.csv
Saved MaeslantkeringZeezijdeZuidMeetpaal.csv
Saved NieuwpoortBuoy.csv
Saved Nymindegab.csv
Saved Oosterschelde11.csv
Saved OstendEasternPalisadeBuoy.csv
Saved OverloopVanValkenisse.csv
Saved PasVanTerneuzenBoei.csv
Saved Q1_.csv
Saved WaddenEierlandseGat.csv
Saved WesthinderBuoy.csv
Saved ZeebruggeZandopvangkadeBuoy.csv
Saved ZwinBuoy.csv
