Before, starting the deployment make sure AzureMl kernal is selected

In [23]:
# Check for package
!pip show azure-ai-ml

Name: azure-ai-ml
Version: 1.28.1
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py38/lib/python3.10/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-monitor-opentelemetry, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, pydash, pyjwt, pyyaml, six, strictyaml, tqdm, typing-extensions
Required-by: 


In [2]:
# Install the package if not available
!pip install azure-ai-ml

Collecting azure-ai-ml
  Downloading azure_ai_ml-1.28.1-py3-none-any.whl (13.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m112.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting azure-monitor-opentelemetry
  Downloading azure_monitor_opentelemetry-1.6.11-py3-none-any.whl (25 kB)
Collecting pydash<9.0.0,>=6.0.0
  Downloading pydash-8.0.5-py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.1/102.1 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting azure-storage-file-datalake>=12.2.0
  Downloading azure_storage_file_datalake-12.21.0-py3-none-any.whl (264 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.1/264.1 kB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
Collecting azure-storage-file-share
  Downloading azure_storage_file_share-12.22.0-py3-none-any.whl (291 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.3/291.3 kB[0m [31m39.2 MB/s[0m 

# User Identity verification

In [3]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
# Get the registered model
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Environment,
    CodeConfiguration,
    Model,
    OnlineRequestSettings
)
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

## Initialize ML Client

In [4]:

# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

Found the config file in: /config.json


## Initialize an endpoint

In [26]:
endpoint_name = "gte-finance-endpoint"
# Create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    description="Endpoint for GTE Finance model",
    auth_mode="key"
)


## Register the embedding model with ML Workspace

In [6]:
model = Model(
       name="gte-finance-model",
       version="1",
       description="FinanceRAG embedding model",
       path="./model/gte-finance-model/",
       type="custom_model"
   )
ml_client.models.create_or_update(model)

[32mUploading gte-finance-model (28.92 MBs): 100%|██████████| 28920962/28920962 [00:00<00:00, 53327542.02it/s]
[39m



Model({'job_name': None, 'intellectual_property': None, 'system_metadata': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'gte-finance-model', 'description': 'FinanceRAG embedding model', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/4be8a069-9dad-4913-a634-fc7605684d95/resourceGroups/financerag-rgf846e66535fa4a64bd/providers/Microsoft.MachineLearningServices/workspaces/financerag-mlwf846e66535fa4a64bd/models/gte-finance-model/versions/1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cif846e66535fa4a64bd/code/Users/capcool79/checkout', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x76a8700f79a0>, 'serialize': <msrest.serialization.Serializer object at 0x76a8700f5a80>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/4be8a069-9dad-4913-a634-fc7605684d95/resourceGroups/financerag-rgf846e66535fa4a64bd/workspa

In [1]:
# Command if model already registered
# model = ml_client.models.get(name="gte-finance-model",version=1)

# Create enviornment from base Image

In [9]:
%%writefile model/environment.yml
name: embedding_inference_env
channels:
  - conda-forge
  - pytorch
  - defaults
dependencies:
  - python=3.10
  - pip=23.1.2
  - pip:
    - sentence-transformers>=2.2.2
    - torch>=2.0.0
    - transformers>=4.30.0
    - peft>=0.4.0
    - numpy>=1.24.0
    - tqdm>=4.65.0
    - scikit-learn>=1.2.2
    - joblib
    - azureml-inference-server-http
    - inference-schema

Writing model/environment.yml


In [10]:
# Create environment
environment = Environment(
    name="gte-finance-env",
    description="Environment for GTE Finance model",
    conda_file="model/environment.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
)

# Create Endpoint

In [27]:
# Create a deployment

# Create or update the endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
print(f"Endpoint '{endpoint_name}' created or updated successfully")

Endpoint 'gte-finance-endpoint' created or updated successfully


# Create Score.py file for Model Inference

In [14]:
%%writefile model/score.py
import os
import json
import logging
import numpy as np
from sentence_transformers import SentenceTransformer

# Declare globals at the module level
model = None
config = {}

def init():
    """
    Initialize the model when the container starts.
    This function is called once when the service is deployed.
    """
    global model, config

    logging.info("Initializing GTE Finance model")

    # Centralized configuration
    config = {
        "embedding_batch_size": 16,
        "show_progress_bar": False,
        "normalize_embeddings": False,
        "max_seq_length": 2000
    }

    try:
        # Get base path from environment variable
        base_model_dir = os.getenv("AZUREML_MODEL_DIR", "")

        # If the environment variable is not set, use fallback path
        if not base_model_dir:
            base_model_dir = "/var/azureml-app/azureml-models/gte-finance-model/1"

        # Recursive function to find potential model directories
        def find_model_dirs(directory, max_depth=5, current_depth=0):
            if current_depth > max_depth or not os.path.exists(directory):
                return []

            potential_dirs = []

            # Check if this directory could be a model directory
            if any(os.path.exists(os.path.join(directory, f)) for f in
                  ["config.json", "config_sentence_transformers.json", "modules.json"]):
                potential_dirs.append(directory)

            # Recursively check subdirectories
            try:
                for item in os.listdir(directory):
                    item_path = os.path.join(directory, item)
                    if os.path.isdir(item_path):
                        potential_dirs.extend(find_model_dirs(item_path, max_depth, current_depth + 1))
            except Exception:
                pass

            return potential_dirs

        # Find all potential model directories
        potential_model_dirs = find_model_dirs(base_model_dir)

        # Try loading the model from each potential directory
        model = None

        # Try loading from Hugging Face hub as a fallback option
        try:
            model = SentenceTransformer("Yaksh170802/gte-finance-model",trust_remote_code=True)
        except Exception:
            pass

        # If Hugging Face loading failed, try local directories
        if model is None:
            for model_dir in potential_model_dirs:
                try:
                    model = SentenceTransformer(model_dir)
                    break
                except Exception:
                    continue

        # Final attempt - try loading directly from base directory with trust_remote_code
        if model is None:
            model = SentenceTransformer(base_model_dir, trust_remote_code=True)

        # Set max_seq_length after initialization
        if config.get("max_seq_length"):
            model.max_seq_length = config.get("max_seq_length")

        logging.info(f"Model max sequence length: {model.max_seq_length}")
        logging.info("Model loaded successfully ✅")

    except Exception as e:
        logging.error(f"Error in model initialization: {str(e)}")
        raise

def generate_embeddings(texts, model_instance, model_config):
    """
    Generate embeddings for a list of input texts.

    Args:
        texts (list of str): The texts to embed.
        model_instance (SentenceTransformer): The loaded model instance.
        model_config (dict): A dictionary containing configuration for encoding.

    Returns:
        numpy.ndarray: The generated embeddings.
    """
    if not model_instance:
        raise RuntimeError("Model has not been initialized. Call init() first.")

    # Generate embeddings
    embeddings = model_instance.encode(
        texts,
        batch_size=model_config.get("embedding_batch_size", 16),
        show_progress_bar=model_config.get("show_progress_bar", False),
        normalize_embeddings=model_config.get("normalize_embeddings", False)
    )

    return embeddings

def run(raw_data):
    """
    Run a prediction on the input data.
    This function is called for each scoring request.
    """
    try:
        logging.info("Received input data for scoring")
        data = json.loads(raw_data)

        texts = data.get("texts", [])
        if not isinstance(texts, list) or not texts:
            return json.dumps({
                "error": "Input must be a JSON object with a 'texts' key containing a non-empty list of strings."
            })

        # Generate embeddings using the loaded model and config
        embeddings = generate_embeddings(texts, model, config)

        # Prepare the successful response
        response = {
            "embeddings": embeddings.tolist(),
            "dimensions": embeddings.shape[1],
            "count": len(texts)
        }

        return json.dumps(response)

    except json.JSONDecodeError:
        logging.error("Failed to decode JSON from input data.")
        return json.dumps({"error": "Invalid JSON format received."})

    except Exception as e:
        logging.error(f"Error during prediction: {str(e)}", exc_info=True)
        return json.dumps({"error": str(e)})

Overwriting model/score.py


# Initialize Managed Endpoint Deployment

In [34]:
deployment_name =  "gte-finance-deployment"
instance_type = "Standard_E4s_v3"
deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    model=model.id,
    environment=environment,
    code_configuration=CodeConfiguration(
        code="./model",
        scoring_script="score.py"  # This must match the name of your script in the src directory
    ),
    instance_type=instance_type,
    instance_count=2,
    environment_variables={
        "MAX_SEQUENCE_LENGTH": "2000",
        "SENTENCE_TRANSFORMERS_HOME": "/var/azureml-app/sentence_transformers_cache"
    },
    request_settings = OnlineRequestSettings(request_timeout_ms = 180000)
)

# Create Deployment

In [35]:
# Create or update the deployment
ml_client.online_deployments.begin_create_or_update(deployment).result()
print(f"Deployment '{deployment_name}' created or updated successfully")

# Allocate traffic to the deployment
endpoint = ml_client.online_endpoints.get(name=endpoint_name)

# Update traffic
ml_client.online_endpoints.begin_create_or_update(
    ManagedOnlineEndpoint(
        name=endpoint_name,
        traffic={deployment_name: 100}
    )
).result()

print(f"Traffic allocated to deployment '{deployment_name}'")
print(f"Endpoint URL: {endpoint.scoring_uri}")

Check: endpoint gte-finance-endpoint exists


.

# Test Deployment

In [None]:
corpus = """Germany,[d] officially the Federal Republic of Germany,[e] is a country in Central Europe. It lies between the Baltic Sea and the North Sea to the north and the Alps to the south. Its sixteen constituent states have a total population of over 82 million in an area of 357,596 km2 (138,069 sq mi), making it the most populous member state of the European Union. Germany borders Denmark to the north, Poland and the Czech Republic to the east, Austria and Switzerland to the south, and France, Luxembourg, Belgium, and the Netherlands to the west. The nation's capital and most populous city is Berlin and its main financial centre is Frankfurt; the largest urban area is the Ruhr.

Settlement in the territory of modern Germany began in the Lower Paleolithic, with various tribes inhabiting it from the Neolithic onward, chiefly the Celts. Various Germanic tribes have inhabited the northern parts of modern Germany since classical antiquity. A region named Germania was documented before AD 100. In 962, the Kingdom of Germany formed the bulk of the Holy Roman Empire. During the 16th century, northern German regions became the centre of the Protestant Reformation. Following the Napoleonic Wars and the dissolution of the Holy Roman Empire in 1806, the German Confederation was formed in 1815.

Formal unification of Germany into the modern nation-state commenced on 18 August 1866 with the North German Confederation Treaty establishing the Prussia-led North German Confederation, which became the German Empire in 1871. After World War I and the German Revolution of 1918–1919, the Empire was replaced by the Weimar Republic. The Nazi rise to power in 1933 led to the establishment of a totalitarian dictatorship, World War II, and the Holocaust. In 1949, after the war and a period of Allied occupation, Germany was organised into two separate polities with limited sovereignty: the Federal Republic of Germany, or West Germany, and the German Democratic Republic, or East Germany. Berlin continued its de jure Four Power status. The Federal Republic of Germany was a founding member of the Council of Europe, the European Economic Community and the European Union in 1951, while the German Democratic Republic was a communist Eastern Bloc state and member of the Warsaw Pact. After the fall of the communist led-government in East Germany, German reunification saw the former East German states join the Federal Republic of Germany on 3 October 1990.

Germany is a developed country with a strong economy; it has the largest economy in Europe by nominal GDP. As a major force in several industrial, scientific and technological sectors, Germany is both the world's third-largest exporter and third-largest importer. It offers social security, a universal health care system, and tuition-free university education. Widely considered a great power, Germany is part of multiple international organisations and forums. It has the third-highest number of UNESCO World Heritage Sites: 55, of which 52 are cultural.

Etymology
Further information: Names of Germany, Germani, and Germania
The English word Germany derives from the Latin Germania, which came into use after Julius Caesar adopted it for the peoples east of the Rhine.[12] The German term Deutschland, originally diutisciu land ('the German lands'), is derived from deutsch (cf. Dutch), which descended from Old High German diutisc 'of the people' (from diot or diota 'people'), originally used to distinguish the language of the common people from Latin and its Romance descendants. This in turn descends from Proto-Germanic *þiudiskaz 'of the people' (see also the Latinised form Theodiscus), derived from *þeudō, descended from Proto-Indo-European *tewtéh₂- 'people', from which the word Teutons also originates.[13]

History"""

In [None]:
import urllib.request
import json

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {"texts":[corpus for i in range(32)]}

body = str.encode(json.dumps(data))

url = 'https://gte-finance-endpoint.eastus.inference.ml.azure.com/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = ''
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


headers = {'Content-Type':'application/json', 'Accept': 'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    result = json.loads(json.loads(result.decode('utf-8')))

    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))
