# EOEPCA MLOps Validation and Usage Notebook

## Setup

In [1]:
import os
import requests
import json
from pathlib import Path

import sys
sys.path.append('../')
from modules.helpers import get_access_token, load_eoepca_state, test_cell, test_results

Load `eoepca state` environment

In [2]:
load_eoepca_state()

In [11]:
platform_domain = os.environ.get("INGRESS_HOST")
test_user = os.environ.get("KEYCLOAK_TEST_USER", "root")
use_oidc = os.environ.get("USE_CERT_MANAGER", "yes") == "yes"
project_name = "mlops-test-project"
gitlab_domain = f'https://gitlab.{platform_domain}'
sharinghub_domain = f'https://sharinghub.{platform_domain}'
mlflow_base_url = f'{sharinghub_domain}/mlflow'

In [7]:
services = {
    "GitLab": gitlab_domain,
    "SharingHub": sharinghub_domain,
    "MLflow": mlflow_base_url
}

for service, url in services.items():
    response = requests.get(url)
    print(f"{service} ({url}): {response.status_code}")

GitLab (https://gitlab.test.eoepca.org): 200
SharingHub (https://sharinghub.test.eoepca.org): 200
MLflow (https://sharinghub.test.eoepca.org/mlflow): 401


## GitLab Project Validation

In [8]:
project_api_url = f"{gitlab_domain}/api/v4/projects/1"

project_response = requests.get(project_api_url)
if project_response.status_code == 200:
    project_data = project_response.json()
    print(f"‚úÖ GitLab Project '{project_name}' exists.")
else:
    print(f"‚ùå GitLab Project '{project_name}' not found or inaccessible.")

‚úÖ GitLab Project 'mlops-test-project' exists.


## SharingHub Project Validation

In [12]:
sharinghub_project_url = f"{sharinghub_domain}/api/stac/collections/ai-model/items/{test_user}/{project_name}"
print(f"SharingHub Project URL: {sharinghub_project_url}")
sharinghub_response = requests.get(sharinghub_project_url)

if sharinghub_response.status_code == 200:
    print(f"‚úÖ Project '{project_name}' is discoverable in SharingHub.")
    print(json.dumps(sharinghub_response.json(), indent=2))
else:
    print(f"‚ùå Project '{project_name}' not found in SharingHub.")

SharingHub Project URL: https://sharinghub.test.eoepca.org/api/stac/collections/ai-model/items/eoepcauser/mlops-test-project
‚úÖ Project 'mlops-test-project' is discoverable in SharingHub.
{
  "stac_version": "1.0.0",
  "stac_extensions": [],
  "type": "Feature",
  "id": "eoepcauser/mlops-test-project",
  "geometry": null,
  "collection": "ai-model",
  "properties": {
    "title": "mlops-test-project",
    "description": "# mlops-test-project\n\n## Getting started\n\nTo make it easy for you to get started with GitLab, here's a list of recommended next steps.\n\nAlready a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!\n\n## Add your files\n\n- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files\n- [ ] [Add files using the command line](https://docs.gitlab.com/topic

## MLflow Project Setup Validation

In [13]:
!pip install mlflow scikit-learn

Defaulting to user installation because normal site-packages is not writeable


### Retrieve a GitLab Token

Navigate to `https://gitlab.${INGRESS_HOST}/root/mlops-test-project/-/settings/access_tokens`

In [None]:
os.environ["MLFLOW_TRACKING_URI"] = f'https://sharinghub.{platform_domain}/mlflow/{test_user}/{project_name}/tracking/'
os.environ["MLFLOW_TRACKING_TOKEN"] = 'glpat-XXXXXXXXXXXXX'

import mlflow
import mlflow.sklearn

client = mlflow.tracking.MlflowClient(tracking_uri=os.environ["MLFLOW_TRACKING_URI"])
experiments = client.search_experiments()
print(f"‚úÖ Connected to MLflow. Available experiments:")
for exp in experiments:
    print(f"- {exp.name} (ID: {exp.experiment_id})")

‚úÖ Connected to MLflow. Available experiments:
- example (1) (ID: 1)


## Running a Sample MLflow Experiment

In [17]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import mlflow.sklearn


experiment_name = "example (1)"
mlflow.set_experiment(experiment_name)
mlflow.autolog()

data_path = "wine-quality.csv"
data = pd.read_csv(data_path)

data = data.dropna()
X = data.drop("quality", axis=1)
y = data["quality"]

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Start an MLflow run
with mlflow.start_run():
    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}")

    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("random_state", 42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)

    mlflow.sklearn.log_model(model, "model")

    print("Model training complete.")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")



2025/12/17 11:38:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Run ID: 12efe9f6651f4c378f100354a5243fcc




Model training complete.
Mean Squared Error: 0.5690247717229262
R^2 Score: 0.2652750042179145
üèÉ View run classy-wren-956 at: https://sharinghub.test.eoepca.org/mlflow/eoepcauser/mlops-test-project/tracking/#/experiments/1/runs/12efe9f6651f4c378f100354a5243fcc
üß™ View experiment at: https://sharinghub.test.eoepca.org/mlflow/eoepcauser/mlops-test-project/tracking/#/experiments/1


## Validate Experiment Logging in MLflow

In [10]:
run_info = client.get_run(run_id) # run.info.run_id
if run_info:
    print(f"‚úÖ Run details from MLflow:")
    print(json.dumps(run_info.data.to_dictionary(), indent=2))
else:
    print("‚ùå Failed to retrieve run details from MLflow.")

‚úÖ Run details from MLflow:
{
  "metrics": {
    "training_mean_squared_error": 0.5628259699588581,
    "training_mean_absolute_error": 0.5826467714644642,
    "training_r2_score": 0.2843126626389435,
    "training_root_mean_squared_error": 0.7502172818316425,
    "training_score": 0.2843126626389435,
    "mse": 0.5690247717229262,
    "r2": 0.2652750042179145
  },
  "params": {
    "model_type": "LinearRegression",
    "test_size": "0.2",
    "random_state": "42",
    "copy_X": "True",
    "fit_intercept": "True",
    "n_jobs": "None",
    "positive": "False"
  },
  "tags": {
    "mlflow.user": "eouser",
    "mlflow.source.name": "/home/eouser/.local/lib/python3.10/site-packages/ipykernel_launcher.py",
    "mlflow.source.type": "LOCAL",
    "mlflow.runName": "aged-cod-538",
    "estimator_name": "LinearRegression",
    "estimator_class": "sklearn.linear_model._base.LinearRegression",
    "mlflow.log-model.history": "[{\"run_id\": \"166f339f59674b94b8ba571641d7b7d8\", \"artifact_path\

## Validate Artifact Storage in S3 (MinIO)

In [18]:
!pip install minio

Defaulting to user installation because normal site-packages is not writeable


In [19]:
from minio import Minio
from minio.error import S3Error

minio_endpoint = os.environ.get("S3_ENDPOINT").replace('https://', '').replace('http://', '')
minio_access_key = os.environ.get("S3_ACCESS_KEY")
minio_secret_key = os.environ.get("S3_SECRET_KEY")
bucket_mlflow = os.environ.get("S3_BUCKET_MLFLOW")

minio_client = Minio(minio_endpoint,
                     access_key=minio_access_key,
                     secret_key=minio_secret_key,
                     secure=True)

try:
    artifacts = minio_client.list_objects(bucket_mlflow, recursive=True)
    artifacts_list = [obj.object_name for obj in artifacts]
    if artifacts_list:
        print(f"‚úÖ Artifacts stored in bucket '{bucket_mlflow}':")
        for artifact in artifacts_list:
            print(f"- {artifact}")
    else:
        print(f"‚ö†Ô∏è No artifacts found in bucket '{bucket_mlflow}'.")
except S3Error as exc:
    print(f"‚ùå MinIO access error: {exc}")

‚úÖ Artifacts stored in bucket 'mlopbb-mlflow-sharinghub':
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/estimator.html
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/model/MLmodel
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/model/conda.yaml
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/model/model.pkl
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/model/python_env.yaml
- 1/025d3c1b46a447c6a97292d582d1d73a/artifacts/model/requirements.txt
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/estimator.html
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/model/MLmodel
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/model/conda.yaml
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/model/model.pkl
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/model/python_env.yaml
- 1/12efe9f6651f4c378f100354a5243fcc/artifacts/model/requirements.txt
- 1/852f1287259a4034801b1399dc878636/artifacts/estimator.html
- 1/852f1287259a4034801b1399dc878636/artifacts/model/MLmodel
- 1/852f1287259a4034801b1399dc878636/art