### Imports and Helpers

In [13]:
import json, shlex, subprocess, re
from pathlib import Path

def run(cmd, check=True):
    """Run a shell command with pretty print."""
    print("$", cmd)
    return subprocess.run(shlex.split(cmd), check=check)

def find_repo_root(start: Path = None) -> Path:
    p = start or Path.cwd()
    for parent in [p, *p.resolve().parents]:
        if (parent / ".git").exists():
            return parent
    return Path.cwd()

def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    s = re.sub(r"-+", "-", s).strip("-")
    return s[:60]

def run_cmd(cmd, check=True):
    """Run a shell command with pretty print."""
    print("$", cmd)
    return subprocess.run(shlex.split(cmd), check=check)

In [3]:
repo_root = find_repo_root()

# Ask user
doi = input("Enter Zenodo DOI (e.g., 10.5281/zenodo.16256961): ").strip()
if not doi:
    raise SystemExit("DOI is required.")


Enter Zenodo DOI (e.g., 10.5281/zenodo.16256961):  10.5281/zenodo.16256961


In [4]:
# Where to put downloaded files (in the repo)
default_dest = "data/raw"
dest_dir = input(f"Destination directory at repo root [{default_dest}]: ").strip() or default_dest

Destination directory at repo root [data/raw]:  


In [6]:
# Save config JSON for the workflow to read
metadata_dir = repo_root / "metadata"
metadata_dir.mkdir(parents=True, exist_ok=True)
cfg_path = metadata_dir / "zenodo_download.json"
cfg_path.write_text(json.dumps({"doi": doi, "dest_dir": dest_dir}, indent=2), encoding="utf-8")

print("✅ Saved config:", cfg_path.relative_to(repo_root))
print(cfg_path.read_text())

✅ Saved config: metadata/zenodo_download.json
{
  "doi": "10.5281/zenodo.16256961",
  "dest_dir": "data/raw"
}


In [10]:
repo_root = find_repo_root()

tpl_path = repo_root / "helper" / "zenodo-download-template.yml"   # your template location
params_path = repo_root / "metadata" / "zenodo_download.json"      # DOI + dest_dir
out_dir = repo_root / ".github" / "workflows"
out_dir.mkdir(parents=True, exist_ok=True)

assert tpl_path.exists(), f"Template not found: {tpl_path}"
assert params_path.exists(), f"Params not found: {params_path} (run the DOI cell to create it)"

params = json.loads(params_path.read_text(encoding="utf-8"))
doi = params.get("doi", "").strip()
dest_dir = params.get("dest_dir", "metadata").strip() or "notebooks/zenodo_downloads"
assert doi, "DOI missing in metadata/zenodo_download.json"

# Read template with placeholders __DOI__ and default dest_dir inside template (if any)
tpl = tpl_path.read_text(encoding="utf-8")

filled = (tpl
          .replace("__DOI__", doi)
          # Optional: if your template had a placeholder for dest_dir defaults, add it here:
          .replace("notebooks/zenodo_downloads", dest_dir)
          )

# Build filename from DOI slug
# e.g., 10.5281/zenodo.1234567 -> slug "zenodo-1234567"
def slug_from_doi(d):
    # Keep 'zenodo.<id>' tail if present; otherwise slugify the whole doi
    m = re.search(r'(zenodo\.\d+)$', d)
    base = m.group(1) if m else d
    return slugify(base)

slug = slug_from_doi(doi)
out_path = out_dir / f"{slug}-zenodo-download.yml"

# Safety check
if out_path.exists():
    resp = input(f"⚠️ {out_path.name} exists. Overwrite? [y/N]: ").strip().lower()
    if resp not in ("y","yes"):
        print("❌ Aborted. Existing workflow left unchanged.")
        sys.exit(0)

out_path.write_text(filled, encoding="utf-8")
print(f"✅ Wrote workflow to: {out_path.relative_to(repo_root)}")
print("Next: commit & push so GitHub Actions can detect it.")

✅ Wrote workflow to: .github/workflows/zenodo-16256961-zenodo-download.yml
Next: commit & push so GitHub Actions can detect it.


In [11]:
def run(cmd):
    print("$", cmd)
    subprocess.run(shlex.split(cmd), check=True)

run("git add .github/workflows")
run('git commit -m "Add Zenodo download workflow generated from template"')
run("git push origin main")

print("✅ Pushed. Go to GitHub → Actions and you should see “Zenodo Download”.")
print("You can trigger it via Actions → Run workflow, or by pushing a tag like: zenodo-dl-<something>.")

$ git add .github/workflows


fatal: pathspec '.github/workflows' did not match any files


CalledProcessError: Command '['git', 'add', '.github/workflows']' returned non-zero exit status 128.

In [None]:
keep = "greeninformationfactory-biofairnet-pilot1-testrun-zenodo-upload.yml"  # change to the one you want to keep
wf_dir = Path(".github/workflows")
assert wf_dir.exists(), "No .github/workflows directory found."

to_delete = [p for p in wf_dir.glob("*.yml") if p.name != keep]
if not to_delete:
    print("Nothing to delete.")
else:
    for p in to_delete:
        print("Deleting", p)
        p.unlink()

    subprocess.run(shlex.split("git add -A"), check=True)
    subprocess.run(shlex.split('git commit -m "Clean workflows: keep only %s"' % keep), check=True)
    subprocess.run(shlex.split("git push origin main"), check=True)
    print("✅ Cleaned and pushed.")

In [8]:
# Commit this config so the workflow can read it on the runner
run("git add ../metadata/zenodo_download.json")
run('git commit -m "Add zenodo_download.json (DOI + destination for Zenodo download workflow)"', check=False)
run("git push origin main", check=False)

$ git add ../metadata/zenodo_download.json
$ git commit -m "Add zenodo_download.json (DOI + destination for Zenodo download workflow)"
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	modified:   Download_and_Store.ipynb

no changes added to commit (use "git add" and/or "git commit -a")
$ git push origin main


Everything up-to-date


CompletedProcess(args=['git', 'push', 'origin', 'main'], returncode=0)

In [14]:
# Pull latest (avoid fast-forward issues)
run_cmd("git pull --rebase origin main", check=False)

tag = "zenodo-dl-" + slugify(doi)
# If tag exists already, append a counter suffix
existing = subprocess.run(shlex.split("git tag"), capture_output=True, text=True).stdout.splitlines()
if tag in existing:
    from datetime import datetime
    tag = f"{tag}-{datetime.utcnow().strftime('%H%M%S')}"

run(f"git tag {tag}")
run("git push origin --tags")
print(f"✅ Triggered workflow with tag: {tag}")
print("➡️  Go to GitHub → Actions → watch the 'Zenodo Download' run. Files will be committed to the repo in your chosen folder.")

$ git pull --rebase origin main
$ git tag zenodo-dl-10-5281-zenodo-16256961-130232
$ git push origin --tags


error: cannot pull with rebase: You have unstaged changes.
error: please commit or stash them.


✅ Triggered workflow with tag: zenodo-dl-10-5281-zenodo-16256961-130232
➡️  Go to GitHub → Actions → watch the 'Zenodo Download' run. Files will be committed to the repo in your chosen folder.


To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
 * [new tag]         zenodo-dl-10-5281-zenodo-16256961-130232 -> zenodo-dl-10-5281-zenodo-16256961-130232
