In [None]:
%%writefile finetune_phi3_job.yaml
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
code: ./src

command: >- 
  python finetune_phi3.py ${{inputs.data_dir}} ${{outputs.out_dir}}
inputs:
  data_dir: 
    type: uri_folder
    #mode: ro_mount
    #path: azureml:burbery_data@latest
    path: azureml:Sujet-Finance-Vision-10k@latest
outputs:
  out_dir: 
    type: custom_model
    mode: upload
environment: azureml:llava_finetuning:16
environment_variables:
    WANDB_MODE: disabled
resources:
  instance_count: 1
distribution:
  type: pytorch 
  process_count_per_instance: 1
services:
    my_vs_code:
      type: vs_code
      nodes: all # For distributed jobs, use the `nodes` property to pick which node you want to enable interactive services on. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. Values are "all", or compute node index (for ex. "0", "1" etc.)
    my_jupyter_lab:
      type: jupyter_lab
      nodes: all
#compute: azureml:fine-tune-cluster
compute: azureml:a100-low-priority-france
display_name: finetune_phi3_vision_job_5
#name: finetune_phi3_vision_job
experiment_name: finetune_phi3_vision
description: Finetune Phi3 Vision


In [None]:
!az ml job create -f finetune_phi3_job.yaml

In [None]:
%%writefile endpoint_phi3_env_jo.yaml
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
command: >- 
  echo "Model Dir: ${{inputs.model_dir}}"; sleep 6000
inputs:
  model_dir: 
    type: custom_model
    path: azureml:finetuned_phi3_vision@latest
environment: azureml:llava_finetuning_inference:2
environment_variables:
    WANDB_MODE: disabled
resources:
  instance_count: 1
distribution:
  type: pytorch 
  process_count_per_instance: 1
services:
    my_vs_code:
      type: vs_code
      nodes: all # For distributed jobs, use the `nodes` property to pick which node you want to enable interactive services on. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. Values are "all", or compute node index (for ex. "0", "1" etc.)
    my_jupyter_lab:
      type: jupyter_lab
      nodes: all
compute: azureml:fine-tune-cluster
#compute: azureml:a100-low-priority
display_name: endpoint_phi3_env_job
#name: endpoint_phi3_env_job
experiment_name: finetune_phi3_vision
description: Endpoint Phi3 Vision


In [None]:
!az ml job create -f endpoint_phi3_env_jo.yaml

In [None]:
%%writefile ./endpoint/endpoint.yaml
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
name: finetunedPhi3EndpointFin
auth_mode: key

In [None]:
!az ml online-endpoint create --file ./endpoint/endpoint.yaml

In [34]:
%%writefile ./endpoint/deployment.yaml
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: blue
endpoint_name: finetunedPhi3EndpointFin
model: azureml:phi3_finetuned_financial:1
code_configuration:
  code: .
  scoring_script: score.py
environment: azureml:llava_finetuning_inference:2
instance_type: Standard_NC12s_v3
instance_count: 1
request_settings:
  request_timeout_ms: 180000

Overwriting ./endpoint/deployment.yaml


In [37]:
!az ml online-deployment create --all-traffic --file ./endpoint/deployment.yaml

..............................................................................................................................................................{
  "app_insights_enabled": false,
  "code_configuration": {
    "code": "/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/codes/8544249c-ba30-41c8-a033-5620519fc961/versions/1",
    "scoring_script": "score.py"
  },
  "egress_public_network_access": "enabled",
  "endpoint_name": "finetunedphi3endpointfin",
  "environment": "azureml:/subscriptions/781b03e7-6eb7-4506-bab8-cf3a0d89b1d4/resourceGroups/antonslutsky-rg/providers/Microsoft.MachineLearningServices/workspaces/gpu-workspace/environments/llava_finetuning_inference/versions/2",
  "environment_variables": {
    "AML_APP_ROOT": "/var/azureml-app/endpoint",
    "AZUREML_ENTRY_SCRIPT": "score.py",
    "AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/phi3_finetuned_financia

All traffic will be set to deployment blue once it has been provisioned.
If you interrupt this command or it times out while waiting for the provisioning, you can try to set all the traffic to this deployment later once its has been provisioned.
Check: endpoint finetunedPhi3EndpointFin exists

Uploading endpoint (0.0 MBs):   0%|          | 0/4935 [00:00<?, ?it/s]
Uploading endpoint (0.0 MBs):   8%|8         | 399/4935 [00:00<00:01, 2298.36it/s]
Uploading endpoint (0.0 MBs): 100%|##########| 4935/4935 [00:00<00:00, 6779.65it/s]
Uploading endpoint (0.0 MBs): 100%|##########| 4935/4935 [00:00<00:00, 6473.50it/s]




In [30]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
# data = {
#     "prompt" : "<|user|>\n<|image_1|>What is shown in this image?<|end|><|assistant|>\n",
#     "image_url" : "https://templatelab.com/wp-content/uploads/2016/06/Personal-Financial-Statement-Template-31.jpg"
# }

data = {"input_data": {"input_string": ["hello"]}, "parameters": {"top_p": 1.0, "temperature": 1.0, "max_new_tokens": 500}}

body = str.encode(json.dumps(data))

url = 'https://aml-westus2-phi3v-pihof.westus2.inference.ml.azure.com/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = 'cERxKDSbQHQhkY4cqziMZMiCgM9bSz9u'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

b'{"output":"None"}'
