### Write and Download params
This cell is writing a small JSON that the workflow can read

In [10]:
import json
from pathlib import Path
import sys

repo_root = Path.cwd().parents[0]
params_path = repo_root / "metadata" / "zenodo_download.json"
params_path.parent.mkdir(parents=True, exist_ok=True)

doi = "10.5281/zenodo.16256961"
dest_dir = "data/raw"   # << download should land here

params = {"doi": doi, "dest_dir": dest_dir}
params_path.write_text(json.dumps(params, indent=2), encoding="utf-8")

print("✅ Wrote:", params_path.relative_to(repo_root))
print(params)

✅ Wrote: metadata/zenodo_download.json
{'doi': '10.5281/zenodo.16256961', 'dest_dir': 'data/raw'}


### Generate the workflow from template into .github/workflows/

This assumes the an existing `helper/zenodo-download-template.yml` with `__DOI__` placeholder.

In [11]:
repo_root = Path.cwd().parents[0]

tpl_path = repo_root / "helper" / "zenodo-download-template.yml"
params_path = repo_root / "metadata" / "zenodo_download.json"
out_dir = repo_root / ".github" / "workflows"
out_dir.mkdir(parents=True, exist_ok=True)

assert tpl_path.exists(), f"Template not found: {tpl_path}"
assert params_path.exists(), f"Params not found: {params_path}"

params = json.loads(params_path.read_text(encoding="utf-8"))
doi = params["doi"].strip()
dest_dir = params.get("dest_dir", "data/raw").strip() or "data/raw"

tpl = tpl_path.read_text(encoding="utf-8")

filled = (tpl
          .replace("__DOI__", doi)
          .replace("notebooks/zenodo_downloads", dest_dir)  # in case template has this default
         )

def slug_from_doi(d):
    m = re.search(r'(zenodo\.\d+)$', d)
    base = m.group(1) if m else d
    # simple slug
    return re.sub(r'[^a-zA-Z0-9]+', '-', base).strip('-').lower()

slug = slug_from_doi(doi)
out_path = out_dir / f"{slug}-zenodo-download.yml"

out_path.write_text(filled, encoding="utf-8")
print(f"✅ Wrote workflow: {out_path.relative_to(repo_root)}")

✅ Wrote workflow: .github/workflows/zenodo-16256961-zenodo-download.yml


### Commit the workflow and the params so that the Action can run

This uses git directly from your notebook; if you prefer therminal, do it there.

In [12]:
def run_cmd(cmd, check=True):
    print("$", cmd)
    return subprocess.run(shlex.split(cmd), cwd=repo_root, check=check)

run_cmd("git add .github/workflows metadata/zenodo_download.json")
run_cmd('git commit -m "ci: add zenodo download workflow + params"', check=False)
run_cmd("git push origin main", check=False)
print("✅ Pushed workflow + params")

$ git add .github/workflows metadata/zenodo_download.json
$ git commit -m "ci: add zenodo download workflow + params"
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	modified:   notebooks/Download_and_Store.ipynb
	modified:   notebooks/Train_and_Optimize_Model.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	metadata/runs/
	notebooks/01_download_store.ipynb
	notebooks/smoke test.ipynb

no changes added to commit (use "git add" and/or "git commit -a")
$ git push origin main
✅ Pushed workflow + params


Everything up-to-date


### Trigger the download workflow via a tag, then pull the result

The GitHub runner downloads the file and commits it into `data/raw/`.

In [13]:
def run_cmd(cmd, check=True):
    print("$", cmd)
    return subprocess.run(shlex.split(cmd), cwd=repo_root, check=check)

# Make sure we're up to date
run_cmd("git pull --rebase origin main", check=False)

tag = "zenodo-dl-" + datetime.utcnow().strftime("%Y%m%d-%H%M%S")
existing = subprocess.run(shlex.split("git tag"), cwd=repo_root, capture_output=True, text=True).stdout.splitlines()
if tag in existing:
    tag = tag + "-x"

run_cmd(f"git tag {tag}")
run_cmd("git push origin --tags")

print(f"✅ Triggered Zenodo download workflow with tag: {tag}")
print("➡️  Watch it in GitHub → Actions → Zenodo Download")
print("➡️  When it finishes, re-run the next cell to pull the downloaded file.")

$ git pull --rebase origin main
$ git tag zenodo-dl-20260127-081306
$ git push origin --tags


error: cannot pull with rebase: You have unstaged changes.
error: please commit or stash them.


✅ Triggered Zenodo download workflow with tag: zenodo-dl-20260127-081306
➡️  Watch it in GitHub → Actions → Zenodo Download
➡️  When it finishes, re-run the next cell to pull the downloaded file.


To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
 * [new tag]         zenodo-dl-20260127-081306 -> zenodo-dl-20260127-081306


### Pull the downloaded file ###

<div class="alert alert-block alert-info">
<b>Attention:</b>Only run after the Action succeeds.
</div>

In [16]:
def run_cmd(cmd, check=True):
    print("$", cmd)
    return subprocess.run(shlex.split(cmd), cwd=repo_root, check=check)

run_cmd("git pull --rebase origin main", check=False)

raw_path = repo_root / "data" / "raw" / "BioFairNet_Pilot1_Testrun.csv"
print("Exists?", raw_path.exists(), raw_path)


$ git pull --rebase origin main
Already up to date.
Current branch main is up to date.
Exists? True /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/data/raw/BioFairNet_Pilot1_Testrun.csv


From github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype
 * branch            main       -> FETCH_HEAD
