In [2]:
import os
from pathlib import Path
from azureml.core import Workspace


def _load_env_file(env_path: Path) -> None:
    if not env_path.exists():
        return
    for raw_line in env_path.read_text(encoding="utf-8").splitlines():
        line = raw_line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        if key and key not in os.environ:
            os.environ[key] = value


# Load .env from parent directory
env_path = Path("../../.env").resolve()
_load_env_file(env_path)

# Connect to workspace using secrets from .env
ws = Workspace(
    subscription_id=os.environ["AZUREML_SUBSCRIPTION_ID"],
    resource_group=os.environ["AZUREML_RESOURCE_GROUP"],
    workspace_name=os.environ["AZUREML_WORKSPACE_NAME"]
)

print(f"Connected to workspace: {ws.name}")
print(f"Location: {ws.location}, Resource Group: {ws.resource_group}")

Connected to workspace: data-factory
Location: northeurope, Resource Group: RG_JIT02


In [3]:
from azure.ai.ml import command, Input, Output

train_component = command(
    name="train-model-local",
    display_name="Train Model (Local)",
    inputs={"training_data": Input(type="uri_file")},
    outputs={"model_output": Output(type="uri_folder")},
    code="./src",  # folder with train.py
    command="python train.py --data ${{inputs.training_data}} --output ${{outputs.model_output}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu",
    compute="local"   # âœ… run locally
)

In [4]:
from azure.ai.ml.dsl import pipeline

@pipeline()
def local_pipeline(training_data):
    trained_model = train_component(training_data=training_data)
    return {"model_output": trained_model.outputs.model_output}

In [5]:
pip install azureml.pipeline


Note: you may need to restart the kernel to use updated packages.


In [7]:
# Create a simple pipeline using the workspace from cell 1
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.core import Environment, ScriptRunConfig, Experiment
from pathlib import Path

# Get the correct path to the root directory
notebook_dir = Path.cwd()
root_dir = notebook_dir.parent.parent

# Load the environment from environment.yml
env_file = root_dir / "environment.yml"
env = Environment.from_conda_specification(name="ml-env", file_path=str(env_file))

# Define pipeline outputs
prepared_data = PipelineData("prepared_data", datastore=ws.get_default_datastore())
model_output = PipelineData("model_output", datastore=ws.get_default_datastore())

# Create a simple prep step
prep_step = PythonScriptStep(
    name="Prep Data",
    script_name="prep.py",
    arguments=["--output", prepared_data],
    outputs=[prepared_data],
    compute_target="pipe-action",
    source_directory=str(root_dir),
    runconfig=ScriptRunConfig(source_directory=str(root_dir), environment=env)
)

# Build and submit the pipeline
pipeline = Pipeline(workspace=ws, steps=[prep_step])
exp = Experiment(workspace=ws, name="test-pipeline")
run = exp.submit(pipeline)

print(f"Pipeline submitted. Run ID: {run.id}")

FileNotFoundError: [Errno 2] No such file or directory: 'd:\\aiinstitute\\data_factory\\2_Amsterdam\\test_pipeline\\environment.yml'