# The Regular usage of collie

In [1]:
import sys
import os
sys.path.append("../..")

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

# from collie import (
#     Transformer,
#     Trainer,
#     Evaluator,
#     Pusher,
#     Event,
#     TrainerPayload,
#     TransformerPayload,
#     EvaluatorPayload,
#     PusherPayload,
#     Orchestrator
# )
from collie.core.transform.transform import Transformer
from collie.core.tuner.tuner import Tuner
from collie.core.trainer.trainer import Trainer
from collie.core.evaluator.evaluator import Evaluator
from collie.core.pusher.pusher import Pusher
from collie.core.orchestrator.orchestrator import Orchestrator
from collie.contracts.event import Event
from collie.core.models import (
    TransformerPayload,
    TrainerPayload,
    TunerPayload,
    EvaluatorPayload,
    PusherPayload,
    TrainerArtifactPath,
    TransformerArtifactPath,
    TunerArtifactPath,
    EvaluatorArtifactPath,
    PusherArtifactPath
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
num_samples = 1000
input_dim = 20   
num_classes = 4

## Transformer

In [3]:
class MLPTransformer(Transformer):
    def __init__(self) -> None:
        super().__init__()

    def handle(self, event) -> Event:

        X = torch.randn(num_samples, input_dim)
        y = torch.randint(0, num_classes, (num_samples,))

        X_data = pd.DataFrame(X.numpy(), columns=[f"feature_{i}" for i in range(input_dim)])
        y_data = pd.DataFrame(y.numpy(), columns=["label"])

        train_data = pd.concat([X_data, y_data], axis=1)

        return Event(
            payload=TransformerPayload(
                train_data=train_data,
                validation_data=None,
                test_data=None
            )
        )

## Trainer

In [4]:
class SimpleClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)


class MLPTrainer(Trainer):
    def __init__(self):
        super().__init__()
        self.model = SimpleClassifier()
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def handle(self, event):
        
        train_data = event.payload.train_data

        X = torch.tensor(train_data.drop("label", axis=1).values, dtype=torch.float32)
        y = torch.tensor(train_data["label"].values, dtype=torch.long) 

        dataset = TensorDataset(X, y)
        dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
        epochs = 10
        for epoch in range(1, epochs + 1):
            self.model.train()
            total_loss = 0.0
            for xb, yb in dataloader:
                xb, yb = xb.to(self.device), yb.to(self.device)
                self.optimizer.zero_grad()
                logits = self.model(xb)
                loss = self.criterion(logits, yb)
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item()

            self.log_metric("learning rate", self.scheduler.get_last_lr()[0], step=epoch)
            self.log_metric("loss", round(total_loss/len(dataloader), 3), step=epoch)
            
        return Event(
            payload=TrainerPayload(
                model=self.model,
                train_loss=total_loss/len(dataloader),
                val_loss=None
            )
        )

## Evaluator

In [5]:
class MLPEvaluator(Evaluator):
    def __init__(
        self,
        registered_model_name="MLPClassifier",
        model_uri="" 
    ) -> None:
        super().__init__(
            registered_model_name=registered_model_name, 
            model_uri=model_uri
        )

    def handle(self, event):
        model = event.payload.model
        train_loss = event.payload.train_loss
        #mock the production metrics
        production_metric = 10

        return Event(
            payload=EvaluatorPayload(
                metrics={"Experiment": train_loss, "Production": production_metric},
                greater_is_better=False
            )
        )

## Pusher

In [6]:
class MLPPusher(Pusher):
    def __init__(
        self,
        registered_model_name="MLPClassifier"
    ) -> None:
        super().__init__(registered_model_name=registered_model_name)

    def handle(self, event):
        return Event(
            payload=PusherPayload(
                model_uri="mlp_model_uri",
            )
        )

## Main

In [7]:
orchestrator = Orchestrator(
    tracking_uri="http://localhost:5001",
    components=[
        MLPTransformer(),
        MLPTrainer(),
        MLPEvaluator(),
        MLPPusher()
    ],
    mlflow_tags={"Example": "MLP"},
    experiment_name="MLP2",
)
orchestrator.run()

2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
  return _dataset_source_registry.resolve(
  return _dataset_source_registry.resolve(
2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/09/21 21:46:53 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Logging data with context: train_data ds: <mlflow.data.pandas_dataset.PandasDataset object at 0x14fbf55d0> source: /Users/apple/Documents/PythonProject/collie/example/mlp/mlruns/1/cac1b91813d1487dafec16a8e0fc73ae/artifacts/train_data.csv
🏃 View run Transformer at: http://localhost:5001/#/experiments/1/runs/cac1b91813d1487dafec16a8e0fc73ae
🧪 View experiment at: http://localhost:5001/#/experiments/1


2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Registered model 'MLPClassifier' already exists. Creating a new version of this model...
2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/09/21 21:47:00 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run Trainer at: http://localhost:5001/#/experiments/1/runs/d05b0705a86043afb1eade1a7827a0fc
🧪 View experiment at: http://localhost:5001/#/experiments/1
Model URI: /Users/apple/Documents/PythonProject/collie/example/mlp/mlruns/1/d05b0705a86043afb1eade1a7827a0fc/artifacts
🏃 View run Evaluator at: http://localhost:5001/#/experiments/1/runs/efddc0398ad04ef6a60b508a3c2920e6
🧪 View experiment at: http://localhost:5001/#/experiments/1
🏃 View run Orchestrator at: http://localhost:5001/#/experiments/1/runs/504e14dcb3d445edbfde8dbcf9fd39ff
🧪 View experiment at: http://localhost:5001/#/experiments/1


RuntimeError: Failed to register model 'MLPClassifier' with URI '/Users/apple/Documents/PythonProject/collie/example/mlp/mlruns/1/d05b0705a86043afb1eade1a7827a0fc/artifacts': INVALID_PARAMETER_VALUE: Invalid model version source: '/Users/apple/Documents/PythonProject/collie/example/mlp/mlruns/1/d05b0705a86043afb1eade1a7827a0fc/artifacts'. To use a local path as a model version source, the run_id request parameter has to be specified and the local path has to be contained within the artifact directory of the run specified by the run_id.