In [9]:
import os, json, glob, requests
from dotenv import load_dotenv
from pathlib import Path
import re

# Configure Endpoints & Token 
### (Toggle on Sandbox, to try and test)

In [2]:
load_dotenv("../src/ZENODO_TOKEN.env")

USE_SANDBOX = True #True ist for testing, set to false for actual uploading
BASE = "https://sandbox.zenodo.org" if USE_SANDBOX else "https://zenodo.org"
API  = f"{BASE}/api"
UI   = BASE

TOKEN = os.getenv("ZENODO_TOKEN")
assert TOKEN, "No ZENODO_TOKEN found. Put it in a .env file or set the env var."

print("‚úÖ Token loaded with length:", len(TOKEN))

HEADERS_JSON = {"Content-Type": "application/json",
                "Authorization": f"Bearer {TOKEN}"}
PARAMS = {"access_token": TOKEN}  # sometimes used for PUT uploads

‚úÖ Token loaded with length: 60


# Create the deposition on Zenodo

In [3]:
import os, shutil, pathlib, subprocess

root = pathlib.Path.cwd()  # project root if you launched Jupyter from there
payload = root / "release_payload"
payload.mkdir(exist_ok=True, parents=True)

## Define what files to upload (Used and processed training data + model + plots)

### Collect everything to upload and within the **release_payload** directory

In [4]:
# Collect artifacts (adjust if some are optional)
sources = [
    root / "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_in.csv",
    root / "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_out.csv",
    root / "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_in.csv",
    root / "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_out.csv",
    root / "../data/results/lr_evaluation.csv",
    root / "../models/final_model.pkl",              
    root / "../data/results/lr_evaluation.png",       # optional
]

In [5]:
for src in sources:
    if src.exists():
        shutil.copy2(src, payload / src.name)

print("Payload contains:")
print("\n".join(p.name for p in payload.iterdir()))

Payload contains:
BioFairNet_Pilot1_Testrun_Test_out.csv
BioFairNet_Pilot1_Testrun_Train_out.csv
lr_evaluation.csv
BioFairNet_Pilot1_Testrun_Train_in.csv
BioFairNet_Pilot1_Testrun_Test_in.csv


In [10]:
zenodo_params = {
    "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun",
    "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
    "community": "biofairnet",
    "creator": "Tobias Rosnitschek",
    "affiliation": "University of Bayreuth",
    "orcid": "0000-0002-4876-2536",
    "keywords": ["FAIR", "machine learning", "circular economy"],
    "license": "MIT"
}

In [14]:
# Save so later cells (e.g., workflow writer / trigger) can reuse without re-prompting
config_path = Path("../workflows/zenodo_params.json")
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(zenodo_params, indent=2), encoding="utf-8")

print("\n‚úÖ Parameters captured and saved.")
print(f"üìÑ Saved to: {config_path.resolve()}\n")
print(json.dumps(zenodo_params, indent=2))


‚úÖ Parameters captured and saved.
üìÑ Saved to: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/workflows/zenodo_params.json

{
  "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun",
  "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
  "community": "biofairnet",
  "creator": "Tobias Rosnitschek",
  "affiliation": "University of Bayreuth",
  "orcid": "0000-0002-4876-2536",
  "keywords": [
    "FAIR",
    "machine learning",
    "circular economy"
  ],
  "license": "MIT"
}


In [17]:
params_path = Path("../workflows/zenodo_params.json")
if "zenodo_params" not in globals():
    assert params_path.exists(), "No zenodo_params in memory; and .github/zenodo_params.json not found."
    zenodo_params = json.loads(params_path.read_text(encoding="utf-8"))

title       = zenodo_params["title"]
description = zenodo_params["description"]
community   = zenodo_params["community"]
creator     = zenodo_params["creator"]
affiliation = zenodo_params["affiliation"]
orcid       = zenodo_params.get("orcid","")
keywords    = zenodo_params.get("keywords", [])
license_id  = zenodo_params.get("license","MIT")

# ensure at least 3 keywords slots
kw1 = keywords[0] if len(keywords) > 0 else ""
kw2 = keywords[1] if len(keywords) > 1 else ""
kw3 = keywords[2] if len(keywords) > 2 else ""

# slugify title for filename
def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    s = re.sub(r"-+", "-", s).strip("-")
    return s[:60]  # keep it reasonable
slug = slugify(title)

wf_dir = Path("../workflows")
tpl_path = wf_dir / "zenodo-upload-template.yml"
out_path = wf_dir / f"{slug}-zenodo-upload.yml"

assert tpl_path.exists(), f"Template not found: {tpl_path}. Run the template cell first."

# safety check
if out_path.exists():
    ans = input(f"‚ö†Ô∏è {out_path.name} exists. Overwrite? [y/N]: ").strip().lower()
    if ans not in ("y","yes"):
        print("‚ùå Aborted. Existing workflow left unchanged.")
        raise SystemExit

# copy & replace
content = tpl_path.read_text(encoding="utf-8")
content = (content
    .replace("__TITLE__", title)
    .replace("__DESCRIPTION__", description)
    .replace("__COMMUNITY__", community)
    .replace("__CREATOR__", creator)
    .replace("__AFFILIATION__", affiliation)
    .replace("__ORCID__", orcid)
    .replace("__KW1__", kw1)
    .replace("__KW2__", kw2)
    .replace("__KW3__", kw3)
    .replace("__LICENSE__", license_id)
)

out_path.write_text(content, encoding="utf-8")
print(f"‚úÖ Workflow created: {out_path}")
print("‚ÑπÔ∏è Reminder: if you want these defaults when running from GitHub ‚Üí Actions, keep this file committed.")

‚úÖ Workflow created: ../workflows/greeninformationfactory-biofairnet-pilot1-testrun-zenodo-upload.yml
‚ÑπÔ∏è Reminder: if you want these defaults when running from GitHub ‚Üí Actions, keep this file committed.


In [18]:
import subprocess, shlex, pathlib

# ensure release_payload exists and has files before forcing add
payload = pathlib.Path("release_payload")
if not payload.exists() or not any(payload.iterdir()):
    print("‚ö†Ô∏è release_payload is empty or missing. Create/populate it before pushing.")
else:
    cmds = [
        "git add -f release_payload",
        f"git add {out_path.as_posix()}",
        'git commit -m "Add Zenodo upload workflow generated from template"',
        "git push origin main",
    ]
    for cmd in cmds:
        print("$", cmd)
        subprocess.run(shlex.split(cmd), check=True)
    print("‚úÖ Pushed workflow and payload.")
    print("Next: Trigger via GitHub ‚Üí Actions ‚Üí Run workflow (pick this file), or push a tag like zenodo-YYYYMMDD-HHMM.")

$ git add -f release_payload
$ git add ../workflows/greeninformationfactory-biofairnet-pilot1-testrun-zenodo-upload.yml
$ git commit -m "Add Zenodo upload workflow generated from template"
$ git push origin main
[main dc2bdf5] Add Zenodo upload workflow generated from template
 Committer: Tobi-Wan-Kenob1 <097e80f6-6687-4e65-aab6-9abf7b887006@4146c987a9a6>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 1 file changed, 106 insertions(+)
 create mode 100644 workflows/greeninformationfactory-biofairnet-pilot1-testrun-zenodo-upload.yml


To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
 ! [rejected]        main -> main (fetch first)
error: failed to push some refs to 'git@github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git'
hint: Updates were rejected because the remote contains work that you do
hint: not have locally. This is usually caused by another repository pushing
hint: to the same ref. You may want to first integrate the remote changes
hint: (e.g., 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.


CalledProcessError: Command '['git', 'push', 'origin', 'main']' returned non-zero exit status 1.