#  Sample mapfile generation for esgpublish operation

## Set constants
- Filenames should be full relative paths consistent to this Notebook file

In [5]:
METAFILE = "../../mapfiles/test_meta2.txt"   #  filename of the metadata
FIRST_ID= "sampleSEC"   # Project identifyer
OUTFILE = "../../mapfiles/test-file3.map"  # output filename 

## Run this bash code in a terminal
- easiest to run in same directory as the Notebook
- wget is required
- scans a directory tree for .nc files
- generates the "metafile" 

## Open the input file

In [4]:
arr = []
with open(METAFILE) as f:
    arr.append(f.read())

## Iterate through input data and write out mapfile

In [6]:
with open(OUTFILE, "w") as f:

    for line in arr:
        parts = line.split()
        path = parts[1]

        checksum = parts[0]

        ts = parts[2]  # timestamp

        sz = parts[3].strip()  # size

        pp2 = path.split('/')
        idx = pp2.index(FIRST_ID)
        last = len(pp2) - 2
        dset_id = ".".join(pp2[idx  :last]) + "#" + pp2[last][1:]

        out_arr = []
        out_arr.append(dset_id)
        out_arr.append(path)
        out_arr.append(sz)
        out_arr.append("mod_time=" + ts)
        out_arr.append("checksum=" + checksum)
        out_arr.append("checksum_type=SHA256")

        f.write(" | ".join(out_arr))

## esgpublish operation

- See https://esg-publisher.readthedocs.org/
- Ensure you have followed steps to install and configure the publisher to create a esg.yaml file
- Assuming you have saved your config file to the current directory as esg.yaml and you have the mapfile `test-file.map`, you should run in a terminal:

esgpublish --map test-file.map --config esg.yaml --no-auth  # No auth used in esgf-docker index install

## Running test publishing using the esgcet module directly

The cells below handle a "generic" publishing workflow for testing purposes.  These are not intended for long production publication runs.

### 1.)  Import statements

In [None]:
from esgcet.generic_netcdf import GenericPublisher

### 2.)  Configure the publisher

In [None]:
argdict = {}
argdict["fullmap"] = OUTFILE   # see cells above...
argdict["silent"] = False
argdict["verbose"] = False
argdict["cert"] = ""   # must be set for restricted ESGF1 publishing relying on certs
argdict["index_node"] = "esgf-fedtest.llnl.gov"
argdict["data_node"] = "fake-test-datanode.llnl.gov"
argdict["data_roots"] = { "/p/user_pub/PCMDIobs" : "test4obs" }
argdict["globus"] = "FAKE"
argdict["dtn"] = 'none'
argdict["replica"] = False
argdict["proj"] = FIRST_ID
argdict["json_file"] = None
argdict["auth"] = False
argdict["user_project_config"] = {}
argdict["test"] = True
argdict["verify"] = True
argdict["mountpoints"] = None
argdict["autoc_command"] = None
argdict["enable_archive"] = False

### 3.) Create a publisher and run the workflow

In [None]:
publisher = GenericPublisher(argdict)

publisher.workflow()