## Fine-tuning Mistral-7b in Azure AI Foundry

Before running the cells code, install required Python packages:
```
pip install azure-identity azure-ai-ml
```

### Step 1: Configuring Environment

In [None]:
# Import necessary libraries
import os
import uuid
import requests
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

In [None]:

# Define constants for the fine-tuning process
SUBSCRIPTION_ID = "<YOUR_AZURE_SUBSCRIPTION_ID>" # Set this variable's value
RESOURCE_GROUP = "<YOUR_RESOURCE_GROUP>" # Set this variable's value
WORKSPACE_NAME = "<YOUR_AZUREML_WORKSPACE_NAME>" # Set this variable's value
MANAGED_IDENTITY_OBJECTID = "<YOUR_MANAGED_IDENTITY_OBJECT_ID>" # Set this variable's value

DISPLAY_NAME = "MaaP-Mistral-FineTuning-ManagedIdentity"
JOB_NAME = "maap-mistral-finetuning-" + str(uuid.uuid4())[:8]
EXPERIMENT_NAME = "MaaP-Mistral-MI-FineTuning-Experiment"
MODEL_NAME = "azureml://registries/azureml/models/mistralai-Mistral-7B-v01/versions/19"
FT_MODEL_NAME = "Mistral-7B-v01-Finetune"
TRAINING_DATA = "training_data"
GPU = "Standard_NC48ads_A100_v4"
CPU = "Standard_E4s_v3"
TASK_TYPE = "TextCompletion"

In [None]:
# Get training data asset id
ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WORKSPACE_NAME,
)

data_asset = ml_client.data.get(TRAINING_DATA, version="1")
ws = ml_client.workspaces.get(WORKSPACE_NAME)
data_asset_id = f"azureml://locations/{ws.location}/workspaces/{ws._workspace_id}/data/{data_asset.name}/versions/{data_asset.version}"

### Step 2: Fine-Tuning Target Model

In [None]:

# Define job payload
payload = {
    "properties": {
        "displayName": DISPLAY_NAME,
        "experimentName": EXPERIMENT_NAME,
        "identity": {
            "identityType": "Managed",
            "objectId": MANAGED_IDENTITY_OBJECTID
        },
        "fineTuningDetails": {
            "hyperParameters": {
                "learning_rate": 5e-6,
                "num_train_epochs": 1,
                "per_device_train_batch_size": 1
            },
            "model": {
                "jobInputType": "mlflow_model",
                "mode": "ReadOnlyMount",
                "uri": MODEL_NAME
            },
            "modelProvider": "Custom",
            "taskType": TASK_TYPE,
            "trainingData": {
                "jobInputType": "uri_file",
                "mode": "ReadOnlyMount",
                "uri": data_asset_id
            }
        },
        "jobType": "FineTuning",
        "outputs": {
            "registered_model": {
                "assetname": FT_MODEL_NAME,
                "jobOutputType": "mlflow_model"
            }
        },
        "resources": {
            "instanceTypes": [
                GPU, CPU
            ]
        }
    }
}

In [None]:
# Construct endpoint URL
endpoint = (
    f"https://management.azure.com/subscriptions/{SUBSCRIPTION_ID}/resourceGroups/{RESOURCE_GROUP}/"
    f"providers/Microsoft.MachineLearningServices/workspaces/{WORKSPACE_NAME}/jobs/{JOB_NAME}"
    f"?api-version=2024-10-01-preview"
)


In [None]:

# Authenticate with Default Azure Credentials, or fallback to Interactive Browser Credentials
try:
    credential = DefaultAzureCredential()
except Exception as ex:
    credential = InteractiveBrowserCredential()

token = credential.get_token("https://management.azure.com/.default").token
ml_token = credential.get_token("https://ml.azure.com/.default").token

headers = {
    "Authorization": f"Bearer {token}",
    "x-azureml-token": f"{ml_token}",
    "Content-Type": "application/json"
}

In [None]:
# Submit fine-tuning job
response = requests.put(
    endpoint,
    headers = headers,
    json = payload
)

print("Status Code:", response.status_code)

try:
    print("Response JSON:", response.json())
except Exception:
    print("Response Text:", response.text)