# Deploy Hugging face `all-mpnet-base-v2` model - with model

Deploy hugging face model with model - (model is first registered in AML)

## Install dependencies 

You only need to do this once in each compute

In [None]:
# %pip install sentence-transformers

## Connect to AML Workspace



In [None]:

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())


print(ml_client.workspace_name)


## Download the model from HuggingFace

- Download model
- Save it locally


In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
model.save('./model')


## Register Model in AML

In [None]:
from azure.ai.ml.entities import Model

model = Model(
    path="./model",
    name="all-mpnet-base-v2",
    description="Hugging Face sentence-transformers model"
)
ml_client.models.create_or_update(model)


## Create or update the endpoint

As the endpoint has a uuid in its name, a new endpoint will always be created

In [None]:
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, CodeConfiguration

import uuid
endpoint_name = "all-mpnet-base-" + str(uuid.uuid4())[:4]

endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    auth_mode="key"
)
ml_client.online_endpoints.begin_create_or_update(endpoint).result()


## Deploy the "deployment"  (the real endpoint)

In [None]:
from azure.ai.ml.entities import Environment
deployment_name = "get-embeddings" 

code_config = CodeConfiguration(
    code="./src/",
    scoring_script="score.py"
)

deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint.name,
    model=model,
    code_configuration=CodeConfiguration(
        code="./src", scoring_script="score.py"
    ),
    environment=Environment(
        image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
        conda_file="conda.yaml",
    ),
    instance_type="Standard_DS3_v2",
    instance_count=1
)


In [None]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

## Assign Traffict to the deployment

In [None]:
endpoint.traffic = {deployment_name: 100}
endpoint = ml_client.begin_create_or_update(endpoint).result()

## Get the endpoint API

In [None]:
API_URI = endpoint.scoring_uri
print(f"API URI: {API_URI}")

## Check the endpoint on the deployment

Go to https://aml.azure.com, find your **Endpoint** -> **Consume** and get the key.

Create a `.env` file and put the following:

```bash
API_KEY=<<get api key from endpoint in aml>>
API_URI=<<the API_URI you got above .. you can also get it from **Endpoint** -> **Consume** >> 
```

## Define Get_embeddings

In [None]:
import requests
import os
from dotenv import load_dotenv
load_dotenv()


API_KEY = os.getenv("API_KEY")
API_URI = os.getenv("API_URI")


def get_embeddings(data,url=API_URI,api_key=API_KEY):


    # Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
    
    if not api_key:
        raise Exception("A key should be provided to invoke the endpoint")

    headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + api_key}

    response = requests.post(url, json=data, headers=headers)

    if response.status_code == 200:
        result = response.json()
        return result
    else:
        print("The request failed with status code: " + str(response.status_code))
        print(response.headers)
        print(response.text)
        return None

## Test it

In [None]:
print(get_embeddings({"sentences": "Hello, World!"}))