In [23]:
import os, json, glob, requests
from dotenv import load_dotenv
from pathlib import Path
import re, sys

# Create the deposition on Zenodo

In [3]:
import os, shutil, pathlib, subprocess

root = pathlib.Path.cwd()  # project root if you launched Jupyter from there
payload = root / "release_payload"
payload.mkdir(exist_ok=True, parents=True)

## Define what files to upload (Used and processed training data + model + plots)

### Collect everything to upload and within the **release_payload** directory

In [4]:
# Collect artifacts (adjust if some are optional)
sources = [
    root / "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_in.csv",
    root / "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_out.csv",
    root / "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_in.csv",
    root / "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_out.csv",
    root / "../data/results/lr_evaluation.csv",
    root / "../models/final_model.pkl",              
    root / "../data/results/lr_evaluation.png",       # optional
]

In [36]:
def find_repo_root(start: Path = None) -> Path:
    p = start or Path.cwd()
    for parent in [p, *p.resolve().parents]:
        if (parent / ".git").exists():
            return parent
    return Path.cwd()

repo_root = find_repo_root()
payload = repo_root / "notebooks" / "release_payload"
payload.mkdir(parents=True, exist_ok=True)

# Define sources RELATIVE TO REPO ROOT (no '../')
rel_sources = [
    "data/processed/Train/BioFairNet_Pilot1_Testrun_Train_in.csv",
    "data/processed/Train/BioFairNet_Pilot1_Testrun_Train_out.csv",
    "data/processed/Test/BioFairNet_Pilot1_Testrun_Test_in.csv",
    "data/processed/Test/BioFairNet_Pilot1_Testrun_Test_out.csv",
    "data/results/lr_evaluation.csv",
    "data/results/lr_predictions_plot.png",
    "notebooks/models/all_modelsTestrun_Pilot1.pkl"        
]
sources = [repo_root / p for p in rel_sources]

missing = []
copied = []
for src in sources:
    if src.exists():
        shutil.copy2(src, payload / src.name)
        copied.append(src.name)
    else:
        missing.append(src.as_posix())

print("✅ Payload now contains:")
for p in sorted([f.name for f in payload.iterdir() if f.is_file()]):
    print("  -", p)

if missing:
    print("\n⚠️ These files were NOT found and were skipped:")
    for m in missing:
        print("  -", m)
    print("\nTip: verify the filenames/locations. For example, your figure might be "
          "`figures/model_performance.png` not `data/results/lr_evaluation.png`, "
          "and `models/final_model.pkl` must be saved before copying.")

✅ Payload now contains:
  - BioFairNet_Pilot1_Testrun_Test_in.csv
  - BioFairNet_Pilot1_Testrun_Test_out.csv
  - BioFairNet_Pilot1_Testrun_Train_in.csv
  - BioFairNet_Pilot1_Testrun_Train_out.csv
  - all_modelsTestrun_Pilot1.pkl
  - lr_evaluation.csv
  - lr_predictions_plot.png


In [40]:
zenodo_params = {
    "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun_Adapted",
    "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
    "community": "biofairnet",
    "creator": "Tobias Rosnitschek",
    "affiliation": "University of Bayreuth",
    "orcid": "0000-0002-4876-2536",
    "keywords": ["FAIR", "machine learning", "circular economy"],
    "license": "MIT"
}

In [41]:
# Save so later cells (e.g., workflow writer / trigger) can reuse without re-prompting
config_path = Path("../metadata/zenodo_params.json")
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(zenodo_params, indent=2), encoding="utf-8")

print("\n✅ Parameters captured and saved.")
print(f"📄 Saved to: {config_path.resolve()}\n")
print(json.dumps(zenodo_params, indent=2))


✅ Parameters captured and saved.
📄 Saved to: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/metadata/zenodo_params.json

{
  "title": "GreenInformationFactory - BioFairNet_Pilot1_Testrun_Adapted",
  "description": "Train/test splits, trained model, and evaluation figure generated by the GreenInformationFactory pipeline. Raw data: 10.5281/zenodo.16256961.",
  "community": "biofairnet",
  "creator": "Tobias Rosnitschek",
  "affiliation": "University of Bayreuth",
  "orcid": "0000-0002-4876-2536",
  "keywords": [
    "FAIR",
    "machine learning",
    "circular economy"
  ],
  "license": "MIT"
}


In [42]:
def find_repo_root(start: Path = None) -> Path:
    p = start or Path.cwd()
    for parent in [p, *p.resolve().parents]:
        if (parent / ".git").exists():
            return parent
    return Path.cwd()

def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    s = re.sub(r"-+", "-", s).strip("-")
    return s[:60]  # keep filename manageable

repo_root = find_repo_root()
params_path = repo_root / "metadata" / "zenodo_params.json"
tpl_path    = repo_root / "helper" / "zenodo-upload-template.yml"
out_dir     = repo_root / ".github" / "workflows"
out_dir.mkdir(parents=True, exist_ok=True)

# Load params
assert params_path.exists(), f"Params file not found: {params_path}"
zenodo_params = json.loads(params_path.read_text(encoding="utf-8"))

# Load template
assert tpl_path.exists(), f"Template not found: {tpl_path}"
tpl = tpl_path.read_text(encoding="utf-8")

# Fill placeholders
kw = (zenodo_params.get("keywords") or []) + ["", "", ""]
filled = (tpl.replace("__TITLE__",        zenodo_params["title"])
            .replace("__DESCRIPTION__",   zenodo_params["description"])
            .replace("__COMMUNITY__",     zenodo_params["community"])
            .replace("__CREATOR__",       zenodo_params["creator"])
            .replace("__AFFILIATION__",   zenodo_params["affiliation"])
            .replace("__ORCID__",         zenodo_params.get("orcid",""))
            .replace("__KW1__",           kw[0])
            .replace("__KW2__",           kw[1])
            .replace("__KW3__",           kw[2])
            .replace("__LICENSE__",       zenodo_params["license"])
         )

# Write output
slug = slugify(zenodo_params["title"])
out_path = out_dir / f"{slug}-zenodo-upload.yml"

if out_path.exists():
    resp = input(f"⚠️ {out_path.name} exists. Overwrite? [y/N]: ").strip().lower()
    if resp not in ("y", "yes"):
        print("❌ Aborted. Existing workflow left unchanged.")
        sys.exit(0)

out_path.write_text(filled, encoding="utf-8")
print(f"✅ Wrote workflow to: {out_path}")
print("ℹ️ Reminder: if you want these defaults when running from GitHub → Actions, keep this file committed.")

✅ Wrote workflow to: /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/.github/workflows/greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml
ℹ️ Reminder: if you want these defaults when running from GitHub → Actions, keep this file committed.


In [None]:
from pathlib import Path
import subprocess, shlex

keep = "greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml"  # change to the one you want to keep
wf_dir = Path(".github/workflows")
assert wf_dir.exists(), "No .github/workflows directory found."

to_delete = [p for p in wf_dir.glob("*.yml") if p.name != keep]
if not to_delete:
    print("Nothing to delete.")
else:
    for p in to_delete:
        print("Deleting", p)
        p.unlink()

    subprocess.run(shlex.split("git add -A"), check=True)
    subprocess.run(shlex.split('git commit -m "Clean workflows: keep only %s"' % keep), check=True)
    subprocess.run(shlex.split("git push origin main"), check=True)
    print("✅ Cleaned and pushed.")

In [48]:
import subprocess, shlex, pathlib

# ensure release_payload exists and has files before forcing add
payload = pathlib.Path("release_payload")
if not payload.exists() or not any(payload.iterdir()):
    print("⚠️ release_payload is empty or missing. Create/populate it before pushing.")
else:
    cmds = [
        "git add -f release_payload",
        f"git add {out_path.as_posix()}",
        'git commit -m "Add Zenodo upload workflow generated from template"',
        "git push origin main",
    ]
    for cmd in cmds:
        print("$", cmd)
        subprocess.run(shlex.split(cmd), check=True)
    print("✅ Pushed workflow and payload.")
    print("Next: Trigger via GitHub → Actions → Run workflow (pick this file), or push a tag like zenodo-YYYYMMDD-HHMM.")

$ git add -f release_payload
$ git add /home/097e80f6-6687-4e65-aab6-9abf7b887006/GreenInformationFactory_Prototype/.github/workflows/greeninformationfactory-biofairnet-pilot1-testrun-adapted-zenodo-upload.yml
$ git commit -m "Add Zenodo upload workflow generated from template"
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	modified:   ../helper/zenodo-upload-template.yml
	modified:   ../metadata/zenodo_params.json
	modified:   Train_and_Optimize_Model-Copy1.ipynb
	modified:   Upload_Publish.ipynb

no changes added to commit (use "git add" and/or "git commit -a")


CalledProcessError: Command '['git', 'commit', '-m', 'Add Zenodo upload workflow generated from template']' returned non-zero exit status 1.

In [50]:
import datetime

tag = "zenodo-"+datetime.datetime.now().strftime("%Ym%d-%H%M")
subprocess.run(shlex.split(f"git tag {tag}"), check = True)

CompletedProcess(args=['git', 'tag', 'zenodo-2025m19-1219'], returncode=0)

In [51]:
subprocess.run(shlex.split("git push origin --tags"), check = True)
print("Pushed tag:",tag)

Pushed tag: zenodo-2025m19-1219


To github.com:Tobi-Wan-Kenob1/GreenInformationFactory_Prototype.git
 * [new tag]         zenodo-2025m19-1219 -> zenodo-2025m19-1219
