# Compiling metadata from original GFDL-MOM6-COBALT2 outputs
**Author:** Denisse Fierro Arcos  
**Date:** 2024-09-05  

This script collects the metadata stored in the GFDL-MOM6-COBALT2 output files. The metadata is then used to create a data frame for future reference.

## Loading libraries

In [1]:
import netCDF4
import os
from glob import glob
import pandas as pd

## Defining basic variables

In [3]:
#Define variables and attributes that should be ignored
not_var = ['time', 'lon', 'lat', 'lev', 'lev2']
not_att = ['_FillValue', 'missing_value', 'coordinates', 'grid_mapping', 'cell_methods']

#Define base directory for GFDL files
base_dir = '/g/data/vf71/fishmip_inputs/ISIMIP3a/global_inputs/obsclim/025deg'

#Get a list of all files containing monthly ESM outputs (depth is excluded)
list_files = glob(os.path.join(base_dir, '*.nc'))

#Create empty dictionary to store metadata
var_dict = {}

## Extracting metadata from WOA files

In [8]:
#Get metadata for all GFDL files
for f in list_files:
    net = netCDF4.Dataset(f, 'r')
    [var] = [i for i in net.variables.keys() if i not in not_var]
    var_dict[var] = {}
    net = net.variables[var]
    attrs = [i for i in net.ncattrs() if i not in not_att]
    for a in attrs:
        var_dict[var].update({a: net.getncattr(a)})

## Create a data frame from dictionary and save to disk

In [9]:
var_df = pd.DataFrame(var_dict).transpose().reset_index(names = 'short_name')

#Define path where data will be stored
f_out = '/g/data/vf71/fishmip_inputs/ISIMIP3a/global_inputs/obsclim/gfdl_var_keys.csv'

#Save data 
var_df.to_csv(f_out, index = False)