In [1]:
# Purpose: Split the BioFairNet pilot dataset into train/test sets and save inputs/targets

import os
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
raw_path = "../data/raw/BioFairNet_Pilot1_Testrun.csv"
df = pd.read_csv(raw_path, sep=';')

In [6]:
df.head()

Unnamed: 0,Time (min),Temperature (°C),Stiring,Pressure (bar)
0,14:12:16,1148,1,7
1,14:12:17,1149,1,7
2,14:12:18,1149,1,7
3,14:12:19,1149,1,7
4,14:12:20,1149,1,7


In [7]:
X = df[["Time (min)", "Temperature (°C)", "Stiring"]]
y = df[["Pressure (bar)"]]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
train_dir = "../data/processed/Train"
test_dir = "../data/processed/Test"
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

X_train.to_csv(os.path.join(train_dir, "BioFairNet_Pilot1_Testrun_Train_in.csv"), index=False)
y_train.to_csv(os.path.join(train_dir, "BioFairNet_Pilot1_Testrun_Train_out.csv"), index=False)
X_test.to_csv(os.path.join(test_dir, "BioFairNet_Pilot1_Testrun_Test_in.csv"), index=False)
y_test.to_csv(os.path.join(test_dir, "BioFairNet_Pilot1_Testrun_Test_out.csv"), index=False)

print("✅ Done: Train/Test split and files saved.")

✅ Done: Train/Test split and files saved.


In [10]:
import requests
import os

ZENODO_API_URL = "https://zenodo.org/api/deposit/depositions"
ACCESS_TOKEN = "biofairnet"

In [12]:
headers = {"Content-Type": "application/json"}
params = {'access_token': ACCESS_TOKEN}

In [13]:
# Step 1: Create new deposition
r = requests.post(ZENODO_API_URL, params=params, json={}, headers=headers)
r.raise_for_status()
deposition_id = r.json()['id']
bucket_url = r.json()["links"]["bucket"]

ProxyError: HTTPSConnectionPool(host='zenodo.org', port=443): Max retries exceeded with url: /api/deposit/depositions?access_token=biofairnet (Caused by ProxyError('Unable to connect to proxy', OSError('Tunnel connection failed: 403 Forbidden')))

In [14]:
ACCESS_TOKEN = 'biofairnet'
r = requests.get('https://zenodo.org/api/deposit/depositions',
                  params={'access_token': ACCESS_TOKEN})
r.status_code
# 200
r.json()

ProxyError: HTTPSConnectionPool(host='zenodo.org', port=443): Max retries exceeded with url: /api/deposit/depositions?access_token=biofairnet (Caused by ProxyError('Unable to connect to proxy', OSError('Tunnel connection failed: 403 Forbidden')))

In [16]:
import requests
r = requests.get("https://zenodo.org/api/deposit/depositions")
r.status_code
# 401
r.json()

ProxyError: HTTPSConnectionPool(host='zenodo.org', port=443): Max retries exceeded with url: /api/deposit/depositions (Caused by ProxyError('Unable to connect to proxy', OSError('Tunnel connection failed: 403 Forbidden')))

import requests
import os

ZENODO_API_URL = "https://zenodo.org/api/deposit/depositions"
ACCESS_TOKEN = "your_token_here"

headers = {"Content-Type": "application/json"}
params = {'access_token': ACCESS_TOKEN}

# Step 1: Create new deposition
r = requests.post(ZENODO_API_URL, params=params, json={}, headers=headers)
r.raise_for_status()
deposition_id = r.json()['id']
bucket_url = r.json()["links"]["bucket"]

# Step 2: Upload files
files_to_upload = [
    "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_in.csv",
    "../data/processed/Train/BioFairNet_Pilot1_Testrun_Train_out.csv",
    "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_in.csv",
    "../data/processed/Test/BioFairNet_Pilot1_Testrun_Test_out.csv"
]

for filepath in files_to_upload:
    filename = os.path.basename(filepath)
    with open(filepath, "rb") as fp:
        print(f"Uploading {filename}...")
        r = requests.put(f"{bucket_url}/{filename}", data=fp, params=params)
        r.raise_for_status()

# Step 3: Add metadata
metadata = {
    'metadata': {
        'title': 'GreenInformationFactory – Train/Test Split Files',
        'upload_type': 'dataset',
        'description': 'Train/test data extracted from BioFairNet Pilot1 dataset for AI model training.',
        'creators': [{'name': 'Rosnitschek, Tobias'}],
        'communities': [{'identifier': 'biofairnet'}],
    }
}
r = requests.put(f"{ZENODO_API_URL}/{deposition_id}", params=params, json=metadata, headers=headers)
r.raise_for_status()

# Step 4: Publish
r = requests.post(f"{ZENODO_API_URL}/{deposition_id}/actions/publish", params=params)
r.raise_for_status()
print("✅ Upload and publish complete!")