# Deploy Hugging face `all-mpnet-base-v2` model - without model

Deploy hugging face model - model is dynamicly loaded by endpoint


## Connect to Azure Machine Learning Workspace

In [None]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Environment,
    CodeConfiguration,
)
from azure.identity import DefaultAzureCredential

In [None]:
# get a handle to the workspace

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

## Create the endpoint


In [None]:
# Define an endpoint name


import uuid
endpoint_name = "all-mpnet-base-" + str(uuid.uuid4())[:4]

endpoint = ManagedOnlineEndpoint(name=endpoint_name)

endpoint = ml_client.begin_create_or_update(endpoint).result()

## Define the deployment

A deployment is a set of resources required for hosting the model that does the actual inferencing.

This also creates a one-off enviroment based on an existing docker image and a conda file

                          |

In [None]:
deployment_name = "get-embeddings"
deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    code_configuration=CodeConfiguration(
        code="./src", scoring_script="score.py"
    ),
    environment=Environment(
        image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
        conda_file="conda.yaml",
    ),
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

## Create the deployment

In [None]:
deployment = ml_client.online_deployments.begin_create_or_update(deployment).result()

.

## Assign traffic to the deployment




In [None]:
endpoint.traffic = {deployment_name: 100}
endpoint = ml_client.begin_create_or_update(endpoint).result()

## Get the endpoint URL


In [None]:
API_URI = endpoint.scoring_uri
print(f"API URI: {API_URI}")

## Check the endpoint on the deployment

Go to https://aml.azure.com, find your **Endpoint** -> **Consume** and get the key.

Create a `.env` file and put the following:

```bash
API_KEY=<<get api key from endpoint in aml>>
API_URI=<<the API_URI you got above .. you can also get it from **Endpoint** -> **Consume** >> 
```

# Define get_embeddings 

In [None]:
import requests
import os
from dotenv import load_dotenv
load_dotenv()



API_KEY = os.getenv("API_KEY")
API_URI = os.getenv("API_URI")


def get_embeddings(data,url=API_URI,api_key=API_KEY):


    # Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
    
    if not api_key:
        raise Exception("A key should be provided to invoke the endpoint")

    headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + api_key}

    response = requests.post(url, json=data, headers=headers)

    if response.status_code == 200:
        result = response.json()
        return result
    else:
        print("The request failed with status code: " + str(response.status_code))
        print(response.headers)
        print(response.text)
        return None


## Test it

In [None]:
print(get_embeddings({"sentences": "Hello, World!"}))