In [4]:
import pandas as pd
import xarray as xr

# Paths to the files
metadata_path = 'N:/gebhyd/3_Hyv/Diplomanden/2_Running/L_Nuesch/data_sweden/NH_SWE_METADATA_SWEDEN.csv'
swe_matrix_path = 'N:/gebhyd/3_Hyv/Diplomanden/2_Running/L_Nuesch/data_sweden/NH_SWE_dataset_matrix_files/NH_SWE_matrix_SWE_mm.csv'
nhsweid_vector_path = 'N:/gebhyd/3_Hyv/Diplomanden/2_Running/L_Nuesch/data_sweden/NH_SWE_dataset_matrix_files/NH_SWE_matrix_NHSWEID_vector.csv'
date_vector_path = 'N:/gebhyd/3_Hyv/Diplomanden/2_Running/L_Nuesch/data_sweden/NH_SWE_dataset_matrix_files/NH_SWE_matrix_Date_vector.csv'

# Step 1: Load the metadata with the correct delimiter and filter columns
metadata = pd.read_csv(metadata_path, delimiter=';')
metadata = metadata[['NHSWEID', 'LAT', 'LON']]
metadata = metadata.set_index('NHSWEID')  # Set NHSWEID as index for easy filtering

# Step 2: Load the SWE matrix and filter by stations in the metadata
swe_matrix = pd.read_csv(swe_matrix_path, header=None)

# Load the ID vector for the SWE matrix columns
nhsweid_vector = pd.read_csv(nhsweid_vector_path, header=None, squeeze=True)

# Filter the ID vector to get indices of relevant stations in the SWE matrix
relevant_ids = nhsweid_vector.isin(metadata.index)
filtered_swe_matrix = swe_matrix.loc[:, relevant_ids]

# Filter metadata to match the relevant IDs in the same order as in nhsweid_vector
filtered_metadata = metadata.loc[nhsweid_vector[relevant_ids].values]

# Step 3: Load the date vector
date_vector = pd.read_csv(date_vector_path, header=None, squeeze=True, parse_dates=[0])

# Step 4: Create the xarray Dataset
swe_data = xr.DataArray(
    data=filtered_swe_matrix.values,
    dims=["time", "Station_ID"],
    coords={
        "time": date_vector,
        "Station_ID": filtered_metadata.index.values
    }
)

# Add latitude and longitude as coordinates
swe_data = swe_data.assign_coords(
    lon=("Station_ID", filtered_metadata['LON'].values),
    lat=("Station_ID", filtered_metadata['LAT'].values)
)

# Step 5: Convert DataArray to Dataset
swe_dataset = xr.Dataset({"swe": swe_data})

# Verify the structure
print(swe_dataset)


<xarray.Dataset>
Dimensions:     (Station_ID: 914, time: 26663)
Coordinates:
  * time        (time) datetime64[ns] 1949-09-01 1949-09-02 ... 2022-08-31
  * Station_ID  (Station_ID) int64 10001 10002 10003 10004 ... 11469 11470 11471
    lon         (Station_ID) float64 14.8 15.62 17.17 15.53 ... 17.9 11.7 14.22
    lat         (Station_ID) float64 56.87 60.62 65.07 58.4 ... 60.2 58.98 58.58
Data variables:
    swe         (time, Station_ID) float64 0.0 0.0 0.0 0.0 ... nan nan nan 0.0


In [6]:
swe_dataset = swe_dataset.rename({"Station_ID": "station_id"})
swe_dataset.to_netcdf('../CAMELS_SW/input_data/SWE_SW.nc')