# Compiling metadata from original WOA23
**Author:** Denisse Fierro Arcos  
**Date:** 2024-09-05  

This script collects the metadata stored in the WOA23 files downloaded from NOAA. The metadata is then used to create a data frame for future reference.

## Loading libraries

In [None]:
import netCDF4
import os
from glob import glob
import pandas as pd

## Defining basic variables

In [None]:
#Define attributes that should be ignored
not_att = ['_FillValue', 'missing_value', 'coordinates', 'grid_mapping', 'cell_methods']

#Getting list of WOA files - Selecting only one averaging period as all shared metadata
list_files = glob('/g/data/vf71/WOA_data/global/*/*00*.nc')

#Create empty dictionary to store metadata
woa_dict = {}

## Extracting metadata from WOA files

In [None]:
#Get metadata for WOA files
for f in list_files:
    net = netCDF4.Dataset(f, 'r')
    #Only two variables are used in the shiny app
    [var] = [i for i in net.variables.keys() if i in ['t_an', 's_an']]
    woa_dict[var] = {}
    net = net.variables[var]
    attrs = [i for i in net.ncattrs() if i not in not_att]
    for a in attrs:
        woa_dict[var].update({a: net.getncattr(a)})

## Create a data frame from dictionary and save to disk

In [None]:
woa_df = pd.DataFrame(woa_dict).transpose().reset_index(names = 'short_name')

#Define path where data will be stored
f_out = '/g/data/vf71/WOA_data/woa_var_keys.csv'
#Store data 
woa_df.to_csv(f_out, index = False)