## Setup

In [2]:
import os
import requests
import json
from pathlib import Path

import sys
sys.path.append('../')
from modules.helpers import get_access_token, load_eoepca_state, test_cell, test_results

Load `eoepca state` environment

In [3]:
load_eoepca_state()

In [None]:
platform_domain = os.environ.get("INGRESS_HOST")
use_oidc = os.environ.get("USE_CERT_MANAGER", "yes") == "yes"
project_name = "mlops-test-project"
gitlab_domain = f'https://gitlab.{platform_domain}'
sharinghub_domain = f'https://sharinghub.{platform_domain}'
mlflow_base_url = f'{sharinghub_domain}/mlflow'

GitLab URL: https://gitlab.notebook-test.develop.eoepca.org
SharingHub URL: https://sharinghub.notebook-test.develop.eoepca.org
MLflow URL: https://sharinghub.notebook-test.develop.eoepca.org/mlflow


In [5]:
services = {
    "GitLab": gitlab_domain,
    "SharingHub": sharinghub_domain,
    "MLflow": mlflow_base_url
}

for service, url in services.items():
    response = requests.get(url)
    print(f"{service} ({url}): {response.status_code}")

GitLab (https://gitlab.notebook-test.develop.eoepca.org): 200
SharingHub (https://sharinghub.notebook-test.develop.eoepca.org): 200
MLflow (https://sharinghub.notebook-test.develop.eoepca.org/mlflow): 401


## GitLab Project Validation

In [None]:
project_api_url = f"{gitlab_domain}/api/v4/projects/1"

project_response = requests.get(project_api_url)
if project_response.status_code == 200:
    project_data = project_response.json()
    print(f"✅ GitLab Project '{project_name}' exists.")
else:
    print(f"❌ GitLab Project '{project_name}' not found or inaccessible.")

✅ GitLab Project 'mlops-test-project' exists.


## SharingHub Project Validation

In [19]:
sharinghub_project_url = f"{sharinghub_domain}/api/stac/collections/ai-model/items/root/{project_name}"
print(f"SharingHub Project URL: {sharinghub_project_url}")
sharinghub_response = requests.get(sharinghub_project_url)

if sharinghub_response.status_code == 200:
    print(f"✅ Project '{project_name}' is discoverable in SharingHub.")
    print(json.dumps(sharinghub_response.json(), indent=2))
else:
    print(f"❌ Project '{project_name}' not found in SharingHub.")

SharingHub Project URL: https://sharinghub.notebook-test.develop.eoepca.org/api/stac/collections/ai-model/items/root/mlops-test-project
✅ Project 'mlops-test-project' is discoverable in SharingHub.
{
  "stac_version": "1.0.0",
  "stac_extensions": [],
  "type": "Feature",
  "id": "root/mlops-test-project",
  "geometry": null,
  "collection": "ai-model",
  "properties": {
    "title": "mlops-test-project",
    "description": "# mlops-test-project\n\n## Getting started\n\nTo make it easy for you to get started with GitLab, here's a list of recommended next steps.\n\nAlready a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!\n\n## Add your files\n\n- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files\n- [ ] [Add files using the command line](https://docs.gitlab.com/ee

## MLflow Project Setup Validation

In [9]:
!pip install mlflow scikit-learn

Collecting mlflow
  Downloading mlflow-2.21.0-py3-none-any.whl.metadata (30 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting mlflow-skinny==2.21.0 (from mlflow)
  Downloading mlflow_skinny-2.21.0-py3-none-any.whl.metadata (31 kB)
Collecting Flask<4 (from mlflow)
  Downloading flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting markdown<4,>=3.3 (from mlflow)
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting pyarrow<20,>=4.0.0 (from mlflow)
  Downloading pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting scipy<2 (from mlflow)
 

### Retrieve a GitLab Token

Navigate to `https://gitlab.${INGRESS_HOST}/root/mlops-test-project/-/settings/access_tokens`

In [None]:
os.environ["MLFLOW_TRACKING_URI"] = f'https://sharinghub.{platform_domain}/mlflow/root/mlops-test-project/tracking/'
os.environ["MLFLOW_TRACKING_TOKEN"] = 'glpat-XXXXXXXXXXX'

import mlflow
import mlflow.sklearn

try:
    client = mlflow.tracking.MlflowClient(tracking_uri=os.environ["MLFLOW_TRACKING_URI"])
    experiments = client.search_experiments()
    print(f"✅ Connected to MLflow. Available experiments:")
    for exp in experiments:
        print(f"- {exp.name} (ID: {exp.experiment_id})")
except Exception as e:
    print(f"❌ MLflow connection failed: {e}")

✅ Connected to MLflow. Available experiments:
- example (1) (ID: 1)


## Running a Sample MLflow Experiment

In [37]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import mlflow.sklearn


experiment_name = "example (1)"
mlflow.set_experiment(experiment_name)
mlflow.autolog()

data_path = "wine-quality.csv"
data = pd.read_csv(data_path)

data = data.dropna()
X = data.drop("quality", axis=1)
y = data["quality"]

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Start an MLflow run
with mlflow.start_run():
    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("random_state", 42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)

    mlflow.sklearn.log_model(model, "model")

    print("Model training complete.")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")


2025/03/24 16:34:53 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Model training complete.
Mean Squared Error: 0.5690247717229262
R^2 Score: 0.2652750042179145
🏃 View run thoughtful-ant-796 at: https://sharinghub.notebook-test.develop.eoepca.org/mlflow/root/mlops-test-project/tracking/#/experiments/1/runs/7582887ee18a4c11b6ebc4656f35ec59
🧪 View experiment at: https://sharinghub.notebook-test.develop.eoepca.org/mlflow/root/mlops-test-project/tracking/#/experiments/1


## Validate Experiment Logging in MLflow

In [None]:
run_id = 'XXXXXXXXXXXXXXXXXXXXXXX'

run_info = client.get_run(run_id) # run.info.run_id
if run_info:
    print(f"✅ Run details from MLflow:")
    print(json.dumps(run_info.data.to_dictionary(), indent=2))
else:
    print("❌ Failed to retrieve run details from MLflow.")

✅ Run details from MLflow:
{
  "metrics": {
    "training_mean_squared_error": 0.5628259699588581,
    "training_mean_absolute_error": 0.5826467714644642,
    "training_r2_score": 0.2843126626389435,
    "training_root_mean_squared_error": 0.7502172818316425,
    "training_score": 0.2843126626389435,
    "mse": 0.5690247717229262,
    "r2": 0.2652750042179145
  },
  "params": {
    "model_type": "LinearRegression",
    "test_size": "0.2",
    "random_state": "42",
    "copy_X": "True",
    "fit_intercept": "True",
    "n_jobs": "None",
    "positive": "False"
  },
  "tags": {
    "mlflow.user": "eouser",
    "mlflow.source.name": "/home/eouser/code/deployment-guide/venv/lib/python3.10/site-packages/ipykernel_launcher.py",
    "mlflow.source.type": "LOCAL",
    "mlflow.runName": "merciful-crow-830",
    "estimator_name": "LinearRegression",
    "estimator_class": "sklearn.linear_model._base.LinearRegression",
    "mlflow.log-model.history": "[{\"run_id\": \"549a655f1bdc43b990fe4f39debcf

## Validate Artifact Storage in S3 (MinIO)

In [30]:
!pip install minio

Collecting minio
  Downloading minio-7.2.15-py3-none-any.whl (95 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.1/95.1 KB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting pycryptodome
  Downloading pycryptodome-3.22.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: pycryptodome, minio
Successfully installed minio-7.2.15 pycryptodome-3.22.0


In [31]:
from minio import Minio
from minio.error import S3Error

minio_endpoint = os.environ.get("S3_ENDPOINT").replace('https://', '').replace('http://', '')
minio_access_key = os.environ.get("S3_ACCESS_KEY")
minio_secret_key = os.environ.get("S3_SECRET_KEY")
bucket_mlflow = os.environ.get("S3_BUCKET_MLFLOW")

minio_client = Minio(minio_endpoint,
                     access_key=minio_access_key,
                     secret_key=minio_secret_key,
                     secure=True)

try:
    artifacts = minio_client.list_objects(bucket_mlflow, recursive=True)
    artifacts_list = [obj.object_name for obj in artifacts]
    if artifacts_list:
        print(f"✅ Artifacts stored in bucket '{bucket_mlflow}':")
        for artifact in artifacts_list:
            print(f"- {artifact}")
    else:
        print(f"⚠️ No artifacts found in bucket '{bucket_mlflow}'.")
except S3Error as exc:
    print(f"❌ MinIO access error: {exc}")

✅ Artifacts stored in bucket 'mlopbb-mlflow-sharinghub':
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/estimator.html
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/model/MLmodel
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/model/conda.yaml
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/model/model.pkl
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/model/python_env.yaml
- 1/549a655f1bdc43b990fe4f39debcfaa6/artifacts/model/requirements.txt
- 1/585e9fa6aea347c081482197779e180a/artifacts/estimator.html
- 1/585e9fa6aea347c081482197779e180a/artifacts/model/MLmodel
- 1/585e9fa6aea347c081482197779e180a/artifacts/model/conda.yaml
- 1/585e9fa6aea347c081482197779e180a/artifacts/model/model.pkl
- 1/585e9fa6aea347c081482197779e180a/artifacts/model/python_env.yaml
- 1/585e9fa6aea347c081482197779e180a/artifacts/model/requirements.txt
