### Connect to Azure Machine Learning workspace

In [20]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
client = MLClient.from_config(credential)


Found the config file in: .\config.json


### Get Credentials from Key Vault

In [21]:
from azure.keyvault.secrets import SecretClient

keyvault_uri = "https://plato-default-key-vault.vault.azure.net/"
secret_client = SecretClient(vault_url=keyvault_uri, credential=credential)

tenant_id = secret_client.get_secret('azure-tenant-id').value
client_id = secret_client.get_secret('ml-client-id').value
client_secret = secret_client.get_secret('ml-client-secret').value

### Preprocess Tokenizer Trainer

In [22]:
from azure.ai.ml import command, Input, UserIdentityConfiguration
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.entities import VsCodeJobService

identity = UserIdentityConfiguration()
env = 'azureml:Plato-GPT-env:5'

# configure the command job
job = command(
    inputs=dict(
        config=Input(
            type=AssetTypes.URI_FILE,
            path='./config.json',
            mode=InputOutputModes.RO_MOUNT,
        ),
        tokenizer=Input(
            type=AssetTypes.URI_FILE,
            path='azureml://subscriptions/af9d95ee-d424-470c-8f1a-3540102fef9f/resourcegroups/plato/workspaces/plato-workspace/datastores/workspaceblobstore/paths/LocalUpload/6a1ed1b7a5c13873be69ea498eea113f/trained_tokenizer.json',
            mode=InputOutputModes.DOWNLOAD,
        ),
        encoded_ids=Input(
            type=AssetTypes.URI_FILE,
            path='azureml://subscriptions/af9d95ee-d424-470c-8f1a-3540102fef9f/resourcegroups/plato/workspaces/plato-workspace/datastores/workspaceblobstore/paths/LocalUpload/37925f58b3d03624804238019da78252/encoded_ids.json',
            mode=InputOutputModes.DOWNLOAD,
        ),
    ),
    environment_variables= {
        'AZURE_TENANT_ID': tenant_id,
        'AZURE_CLIENT_ID': client_id,
        'AZURE_CLIENT_SECRET': client_secret,
    },
    code="./src",  # location of source code
    command="python train.py --config_file ${{inputs.config}} \
                --tokenizer_file ${{inputs.tokenizer}} \
                --encoded_ids_file ${{inputs.encoded_ids}}",
    environment=env,
    compute='a100',
    instance_count=1,
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 4,
    },
    experiment_name="Plato-GPT-3",
    display_name="DDP-Train-50k",
    identity=identity,
    services={
      "vscode": VsCodeJobService(
        nodes="all"
      ),
    }
)

In [24]:
client.create_or_update(job)

Readonly attribute status will be ignored in class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.JobService'>


Experiment,Name,Type,Status,Details Page
Plato-GPT-3,sharp_cabbage_27nvdpvxmf,command,Starting,Link to Azure Machine Learning studio
