# Create the deposition on Zenodo

In [37]:
import os, json, glob, requests
from dotenv import load_dotenv
from pathlib import Path
import re, sys

In [38]:
repo_root = Path(__file__).resolve().parents[1] if "__file__" in globals() else Path.cwd().parents[0]
sys.path.append(str(repo_root))
from helper.upload_collector import prepare_release_payload

In [39]:
import os, shutil, pathlib, subprocess

root = pathlib.Path.cwd()  # project root if you launched Jupyter from there
payload = root / "release_payload"
payload.mkdir(exist_ok=True, parents=True)

## Define what files to upload (Used and processed training data + model + plots)

### Collect everything to upload and within the **release_payload** directory

Enter the path to all files, you want to include in your upload.
They are stored by default at *data/processed/*.

In [59]:
# Collect artifacts (adjust if some are optional)
Files = [
    "Train/BioFairNet_Pilot1_Testrun_Train_in.csv",
    "Train/BioFairNet_Pilot1_Testrun_Train_out.csv",
    "Test/BioFairNet_Pilot1_Testrun_Test_in.csv",
    "Test/BioFairNet_Pilot1_Testrun_Test_out.csv"
        ]
Results = [
    "lr_evaluation.csv",
    "lr_predictions_plot.png"
]

Models = [
    "all_modelsTestrun_Pilot1.pkl"
]

In [60]:
summary = prepare_release_payload(Files, Results, Models)
# Or enforce everything present:
# summary = prepare_release_payload(Files, Results, Models, require_all=True)

print("Copied:", [p.name for p in summary["copied"]])
print("Missing:", [str(p) for p in summary["missing"]])

📦 Payload directory: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/notebooks/release_payload
✅ Copied:
  - BioFairNet_Pilot1_Testrun_Train_in.csv
  - BioFairNet_Pilot1_Testrun_Train_out.csv
  - BioFairNet_Pilot1_Testrun_Test_in.csv
  - BioFairNet_Pilot1_Testrun_Test_out.csv
  - lr_evaluation.csv
  - lr_predictions_plot.png

⚠️ Missing (not found on disk):
  - /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/models/all_modelsTestrun_Pilot1.pkl

Tip: check filenames/locations. For example, results might live in 'figures/' or you may have a typo like 'Text/' vs 'Test/'.
Copied: ['BioFairNet_Pilot1_Testrun_Train_in.csv', 'BioFairNet_Pilot1_Testrun_Train_out.csv', 'BioFairNet_Pilot1_Testrun_Test_in.csv', 'BioFairNet_Pilot1_Testrun_Test_out.csv', 'lr_evaluation.csv', 'lr_predictions_plot.png']
Missing: ['/home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/models/all_modelsTestrun_Pilot1.pkl']


# Zenodo Metadata

Please enter all relevant metadata for your upload here:

In [78]:
zenodo_params = {
    "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun_Adapted",
    "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
    "community": "biofairnet",
    "creator": "Tobias Rosnitschek",
    "affiliation": "University of Bayreuth",
    "orcid": "0000-0002-4876-2536",
    "keywords": ["FAIR", "machine learning", "circular economy","sustainability"],
    "license": "MIT",
    "use_sandbox": "true"
}

In [79]:
# Save so later cells (e.g., workflow writer / trigger) can reuse without re-prompting
config_path = Path("../metadata/zenodo_params.json")
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(zenodo_params, indent=2), encoding="utf-8")

print("\n✅ Parameters captured and saved.")
print(f"📄 Saved to: {config_path.resolve()}\n")
print(json.dumps(zenodo_params, indent=2))


✅ Parameters captured and saved.
📄 Saved to: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/metadata/zenodo_params.json

{
  "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun_Adapted",
  "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
  "community": "biofairnet",
  "creator": "Tobias Rosnitschek",
  "affiliation": "University of Bayreuth",
  "orcid": "0000-0002-4876-2536",
  "keywords": [
    "FAIR",
    "machine learning",
    "circular economy",
    "sustainability"
  ],
  "license": "MIT",
  "use_sandbox": "true"
}


In [80]:
def find_repo_root(start: Path = None) -> Path:
    p = start or Path.cwd()
    for parent in [p, *p.resolve().parents]:
        if (parent / ".git").exists():
            return parent
    return Path.cwd()

def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    s = re.sub(r"-+", "-", s).strip("-")
    return s[:60]  # keep filename manageable

repo_root = find_repo_root()
params_path = repo_root / "metadata" / "zenodo_params.json"
tpl_path    = repo_root / "helper" / "zenodo-upload-template.yml"
out_dir     = repo_root / ".github" / "workflows"
out_dir.mkdir(parents=True, exist_ok=True)

# Load params
assert params_path.exists(), f"Params file not found: {params_path}"
zenodo_params = json.loads(params_path.read_text(encoding="utf-8"))

# Load template
assert tpl_path.exists(), f"Template not found: {tpl_path}"
tpl = tpl_path.read_text(encoding="utf-8")

# Fill placeholders
kw = (zenodo_params.get("keywords") or []) + ["", "", ""]
filled = (tpl.replace("__TITLE__",        zenodo_params["title"])
            .replace("__DESCRIPTION__",   zenodo_params["description"])
            .replace("__COMMUNITY__",     zenodo_params["community"])
            .replace("__CREATOR__",       zenodo_params["creator"])
            .replace("__AFFILIATION__",   zenodo_params["affiliation"])
            .replace("__ORCID__",         zenodo_params["orcid"])
            .replace("__KW1__",           kw[0])
            .replace("__KW2__",           kw[1])
            .replace("__KW3__",           kw[2])
            .replace("__LICENSE__",       zenodo_params["license"])
         )

# Write output
slug = slugify(zenodo_params["title"])
out_path = out_dir / f"{slug}-zenodo-upload.yml"

if out_path.exists():
    resp = input(f"⚠️ {out_path.name} exists. Overwrite? [y/N]: ").strip().lower()
    if resp not in ("y", "yes"):
        print("❌ Aborted. Existing workflow left unchanged.")
        sys.exit(0)

out_path.write_text(filled, encoding="utf-8")
print(f"✅ Wrote workflow to: {out_path}")
print("ℹ️ Reminder: if you want these defaults when running from GitHub → Actions, keep this file committed.")

⚠️ greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml exists. Overwrite? [y/N]:  y


✅ Wrote workflow to: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/.github/workflows/greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml
ℹ️ Reminder: if you want these defaults when running from GitHub → Actions, keep this file committed.


In [81]:
import subprocess, shlex, pathlib

# ensure release_payload exists and has files before forcing add
payload = pathlib.Path("release_payload")
if not payload.exists() or not any(payload.iterdir()):
    print("⚠️ release_payload is empty or missing. Create/populate it before pushing.")
else:
    cmds = [
        "git add -f release_payload",
        f"git add {out_path.as_posix()}",
        'git commit -m "Add Zenodo upload workflow generated from template"',
        "git push origin main",
    ]
    for cmd in cmds:
        print("$", cmd)
        subprocess.run(shlex.split(cmd), check=True)
    print("✅ Pushed workflow and payload.")
    print("Next: Trigger via GitHub → Actions → Run workflow (pick this file), or push a tag like zenodo-YYYYMMDD-HHMM.")

$ git add -f release_payload
$ git add /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/.github/workflows/greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml
$ git commit -m "Add Zenodo upload workflow generated from template"
[main 7f43c12] Add Zenodo upload workflow generated from template
 Committer: Tobi-Wan-Kenob1 <097e80f6-6687-4e65-aab6-9abf7b887006@4146c987a9a6>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 1 file changed, 80 insertions(+), 18 deletions(-)
$ git push origin main
✅ Pushed workflow and payload.
Next: Trigger via GitHub → Actions → Run workflow (pick this file), or 

To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
   9b3e11a..7f43c12  main -> main


In [82]:
import datetime

tag = "zenodo-ul-"+datetime.datetime.now().strftime("%Ym%d-%H%M")
subprocess.run(shlex.split(f"git tag {tag}"), check = True)

CompletedProcess(args=['git', 'tag', 'zenodo-ul-2025m22-1132'], returncode=0)

In [83]:
subprocess.run(shlex.split("git push origin --tags"), check = True)
print("Pushed tag:",tag)

Pushed tag: zenodo-ul-2025m22-1132


To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
 * [new tag]         zenodo-ul-2025m22-1132 -> zenodo-ul-2025m22-1132
