In [4]:
from azure.ai.ml import MLClient, command, Input, Output, dsl
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import DefaultAzureCredential

# Initialize MLClient using DefaultAzureCredential
ml_client = MLClient.from_config(credential=DefaultAzureCredential())

data_asset = ml_client.data.get("Diabetes_Dataset", version="1")



# Define the data preprocessing job
data_preprocess_job = command(
    code ="./src",# Path where the preprocess.py script is located
    command='python data_preprocess.py --data "${{inputs.data}}" --output "${{outputs.processed_data}}"',
    inputs={
        "data": Input(
            path=data_asset.id,
            type=AssetTypes.URI_FILE,
            mode=InputOutputModes.RO_MOUNT
        )
    },
    outputs={
        "processed_data": Output(
            type=AssetTypes.URI_FILE,
            mode=InputOutputModes.RW_MOUNT
        )
    },
    environment="test-env-azureml:1",# Ensure this environment has all required dependencies
    compute="test-compute-1-mlstudio",  
      
)

# Define the model training job
model_training_job = job = command(
    code ="./src",# Path where the preprocess.py script is located
    command='python train-model-mlflow.py --training_data "${{inputs.data}}" --model_output "${{outputs.model_output}}"',
    inputs={
        "data": Input(
            type=AssetTypes.URI_FILE,
            mode=InputOutputModes.RO_MOUNT
        )
    },
    outputs={
        "model_output": Output(
            type=AssetTypes.MLFLOW_MODEL
        )
    },

    
    environment="test-env-azureml:1",# Ensure this environment has all required dependencies
    compute="test-compute-1-mlstudio",)


# Create a pipeline by combining the two jobs
@dsl.pipeline(
    description="Pipeline combining data preprocessing and model training",
    default_compute="test-compute-1-mlstudio"
)
def training_pipeline():
    preprocess_step = data_preprocess_job()
    train_step = model_training_job(
        data=preprocess_step.outputs.processed_data)
    
    return{
        "pipeline_job_transformed_data": preprocess_step.outputs.processed_data,
        "pipeline_job_trained_model": train_step.outputs.model_output,
    }


pipeline = training_pipeline()

# Submit the pipeline to Azure ML
pipeline_job = ml_client.jobs.create_or_update(pipeline)
aml_url = pipeline_job.studio_url
print("Monitor your pipeline at", aml_url)


Found the config file in: /config.json


Monitor your pipeline at https://ml.azure.com/runs/tough_pizza_rm07pbwbft?wsid=/subscriptions/3b7a65ed-df6d-4020-9010-5585f2149752/resourcegroups/rg-test-1/workspaces/mlstudio-test-1&tid=dc0b52a3-68c5-44f7-881d-9383d8850b96
