In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
import json
import tiktoken
import numpy as np
from collections import defaultdict

encoding = tiktoken.get_encoding("cl100k_base") # default encoding used by gpt-4, turbo, and text-embedding-ada-002 models

def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

files = ['./data/training_data/vampires_train.jsonl', './data/training_data/vampires_test.jsonl']

for file in files:
    print(f"Processing file: {file}")
    with open(file, 'r', encoding='utf-8') as f:
        dataset = [json.loads(line) for line in f]

    total_tokens = []
    assistant_tokens = []

    for ex in dataset:
        messages = ex.get("messages", {})
        total_tokens.append(num_tokens_from_messages(messages))
        assistant_tokens.append(num_assistant_tokens_from_messages(messages))

    print_distribution(total_tokens, "total tokens")
    print_distribution(assistant_tokens, "assistant tokens")
    print('*' * 50)

Processing file: ./data/training_data/vampires_train.jsonl

#### Distribution of total tokens:
min / max: 347, 1248
mean / median: 690.8498023715415, 678.5
p5 / p95: 511.5, 896.0

#### Distribution of assistant tokens:
min / max: 96, 425
mean / median: 206.15612648221344, 200.0
p5 / p95: 153.5, 264.0
**************************************************
Processing file: ./data/training_data/vampires_test.jsonl

#### Distribution of total tokens:
min / max: 438, 1143
mean / median: 687.6299212598425, 664.0
p5 / p95: 505.2, 911.8

#### Distribution of assistant tokens:
min / max: 92, 411
mean / median: 204.99212598425197, 200.0
p5 / p95: 150.0, 261.6
**************************************************


In [3]:
from dotenv import load_dotenv
import os
from openai import AzureOpenAI

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_SWEDEN_ENDPOINT"),
  api_key = os.getenv("AZURE_OPENAI_SWEDEN_KEY"),
  api_version = "2024-05-01-preview"  # This API version or later is required to access seed/events/checkpoint features
)

training_file_name = './data/training_data/vampires_train.jsonl'
validation_file_name = './data/training_data/vampires_test.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file = open(training_file_name, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file = open(validation_file_name, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-7a8cb6a4a1fe4eb18bf6b924bb8b5214
Validation file ID: file-22329bf4c3794b8bb9ca0ad238525418


In [4]:
# Submit fine-tuning training job

response = client.fine_tuning.jobs.create(
    training_file = training_file_id,
    validation_file = validation_file_id,
    model = "gpt-4o-mini", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters.
    seed = 105 # seed parameter controls reproducibility of the fine-tuning job. If no seed is specified one will be generated automatically.
)

job_id = response.id

# You can use the job ID to monitor the status of the fine-tuning job.
# The fine-tuning job will take some time to start and complete.

print("Job ID:", response.id)
print("Status:", response.status)
print(response.model_dump_json(indent=2))

Job ID: ftjob-6647cdbd869c4bc8b2140ff7197c4c23
Status: pending
{
  "id": "ftjob-6647cdbd869c4bc8b2140ff7197c4c23",
  "created_at": 1723494275,
  "error": null,
  "fine_tuned_model": null,
  "finished_at": null,
  "hyperparameters": {
    "n_epochs": -1,
    "batch_size": -1,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-4o-mini-2024-07-18",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": null,
  "seed": 105,
  "status": "pending",
  "trained_tokens": null,
  "training_file": "file-7a8cb6a4a1fe4eb18bf6b924bb8b5214",
  "validation_file": "file-22329bf4c3794b8bb9ca0ad238525418",
  "estimated_finish": null,
  "integrations": null
}


In [5]:
# Track training status

from IPython.display import clear_output
import time

start_time = time.time()

# Get the status of our fine-tuning job.
response = client.fine_tuning.jobs.retrieve(job_id)

status = response.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)

    response = client.fine_tuning.jobs.retrieve(job_id)
    print(response.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = response.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
response = client.fine_tuning.jobs.list()
print(f'Found {len(response.data)} fine-tune jobs.')

Fine-tuning job ftjob-6647cdbd869c4bc8b2140ff7197c4c23 finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 1 fine-tune jobs.


In [11]:
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10)
print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "ftevent-a56a078c64e7403eb7af5c6cdd611ebc",
      "created_at": 1723499535,
      "level": "info",
      "message": "Training tokens billed: 3100000",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-1413a98629394703a9c55949e5a44fa5",
      "created_at": 1723499535,
      "level": "info",
      "message": "Model Evaluation Passed.",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-2af6f741f77240979c44aebb096b77a1",
      "created_at": 1723499535,
      "level": "info",
      "message": "Completed results file: file-e27de9e9272b42eca0633d41e3b06383",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-33776e911622407184601d0ea9006031",
      "created_at": 1723499527,
      "level": "info",
      "message": "Postprocessing started.",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
 

In [12]:
# Retrieve fine_tuned_model name

response = client.fine_tuning.jobs.retrieve(job_id)

print(response.model_dump_json(indent=2))
fine_tuned_model = response.fine_tuned_model

{
  "id": "ftjob-6647cdbd869c4bc8b2140ff7197c4c23",
  "created_at": 1723494275,
  "error": null,
  "fine_tuned_model": "gpt-4o-mini-2024-07-18.ft-6647cdbd869c4bc8b2140ff7197c4c23",
  "finished_at": 1723499535,
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-4o-mini-2024-07-18",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": [
    "file-e27de9e9272b42eca0633d41e3b06383"
  ],
  "seed": 105,
  "status": "succeeded",
  "trained_tokens": 1048710,
  "training_file": "file-7a8cb6a4a1fe4eb18bf6b924bb8b5214",
  "validation_file": "file-22329bf4c3794b8bb9ca0ad238525418",
  "estimated_finish": null,
  "integrations": null
}


In [16]:
!az account get-access-token

{
  "accessToken": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IktRMnRBY3JFN2xCYVZWR0JtYzVGb2JnZEpvNCIsImtpZCI6IktRMnRBY3JFN2xCYVZWR0JtYzVGb2JnZEpvNCJ9.eyJhdWQiOiJodHRwczovL21hbmFnZW1lbnQuY29yZS53aW5kb3dzLm5ldC8iLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC8xNmIzYzAxMy1kMzAwLTQ2OGQtYWM2NC03ZWRhMDgyMGI2ZDMvIiwiaWF0IjoxNzIzNTAxMjczLCJuYmYiOjE3MjM1MDEyNzMsImV4cCI6MTcyMzUwNjczMywiYWNyIjoiMSIsImFpbyI6IkFlUUFHLzhYQUFBQWh2OXpBM2VCdGFhSGF1YkRwWmxncnoxU0tsUngvOXphQVJrYkxYbDBnTkttVHdzRCtlSHhpRUh5WTdxOXVHdk9BV2owbWVkeExBZUxNMnJrQ3lDNlRpK2RjTjRYUFBwUzhGaEtBUXlFWGdTbHZGekNwcGkwMXVzUFlEUit4L1BVcUhNUG9BSTJ3UnJxOXNhVlB6QXcxWTNSc3plZm1aNFVwaHhISEI3cEgxdkJ5LzM3UStDNDY4YWNSaHpzUVJEV2JJbVJ5ZTVVU0pIc2NHeXplYjlMTkRSQWVGYXAxNko1ZUZQaDdPckw2Sk9RWnRoT09vNSttUkFzTEJmdDh2V0hmakZ4dW04alNqY1d3WXpFNndHWnVvUGlucm10M0ErUnhHdnl1Q3R0R0JnPSIsImFsdHNlY2lkIjoiNTo6MTAwMzIwMDMzMkVBNEExMyIsImFtciI6WyJyc2EiLCJtZmEiXSwiYXBwaWQiOiIwNGIwNzc5NS04ZGRiLTQ2MWEtYmJlZS0wMmY5ZTFiZjdiNDYiLCJhcHBpZGFjciI6IjAiLCJkZXZpY2VpZCI6ImMwYjcxMjc2LWE

In [14]:
fine_tuned_model

'gpt-4o-mini-2024-07-18.ft-6647cdbd869c4bc8b2140ff7197c4c23'

In [18]:
# Deploy fine-tuned model

import json
import requests

token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IktRMnRBY3JFN2xCYVZWR0JtYzVGb2JnZEpvNCIsImtpZCI6IktRMnRBY3JFN2xCYVZWR0JtYzVGb2JnZEpvNCJ9.eyJhdWQiOiJodHRwczovL21hbmFnZW1lbnQuY29yZS53aW5kb3dzLm5ldC8iLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC8xNmIzYzAxMy1kMzAwLTQ2OGQtYWM2NC03ZWRhMDgyMGI2ZDMvIiwiaWF0IjoxNzIzNTAxMjczLCJuYmYiOjE3MjM1MDEyNzMsImV4cCI6MTcyMzUwNjczMywiYWNyIjoiMSIsImFpbyI6IkFlUUFHLzhYQUFBQWh2OXpBM2VCdGFhSGF1YkRwWmxncnoxU0tsUngvOXphQVJrYkxYbDBnTkttVHdzRCtlSHhpRUh5WTdxOXVHdk9BV2owbWVkeExBZUxNMnJrQ3lDNlRpK2RjTjRYUFBwUzhGaEtBUXlFWGdTbHZGekNwcGkwMXVzUFlEUit4L1BVcUhNUG9BSTJ3UnJxOXNhVlB6QXcxWTNSc3plZm1aNFVwaHhISEI3cEgxdkJ5LzM3UStDNDY4YWNSaHpzUVJEV2JJbVJ5ZTVVU0pIc2NHeXplYjlMTkRSQWVGYXAxNko1ZUZQaDdPckw2Sk9RWnRoT09vNSttUkFzTEJmdDh2V0hmakZ4dW04alNqY1d3WXpFNndHWnVvUGlucm10M0ErUnhHdnl1Q3R0R0JnPSIsImFsdHNlY2lkIjoiNTo6MTAwMzIwMDMzMkVBNEExMyIsImFtciI6WyJyc2EiLCJtZmEiXSwiYXBwaWQiOiIwNGIwNzc5NS04ZGRiLTQ2MWEtYmJlZS0wMmY5ZTFiZjdiNDYiLCJhcHBpZGFjciI6IjAiLCJkZXZpY2VpZCI6ImMwYjcxMjc2LWEyZjQtNDA1Ny04MDBmLTRmOTU0ODQ2MmMzMSIsImVtYWlsIjoidmhvdWRlYmluZUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJIb3VkZWJpbmUiLCJnaXZlbl9uYW1lIjoiVmluY2VudCIsImdyb3VwcyI6WyJiMTMwNDAyMi0wOGU2LTQ0N2QtYjA5NC0xNTM3MDU5N2M2YjYiLCJkMzRjNGViZS00OTg0LTQ5MDMtYTY0ZC04YzIwMjgzZDUxNmIiLCJlMzA5NmRmNy1iNjVjLTRlMzItYWIxYS03YTM1ZGM2ODRmMGEiXSwiaWRwIjoiaHR0cHM6Ly9zdHMud2luZG93cy5uZXQvNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3LyIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjEwOC40MS4xMDEuMTEiLCJuYW1lIjoiVmluY2VudCBIb3VkZWJpbmUiLCJvaWQiOiIyNTU5YjA0ZC02YWY3LTQ5ZjMtODg3ZS04ZjliZDZjMjUzNDciLCJwdWlkIjoiMTAwMzIwMDMzQ0ZGM0E1OSIsInJoIjoiMC5BVVlBRThDekZnRFRqVWFzWkg3YUNDQzIwMFpJZjNrQXV0ZFB1a1Bhd2ZqMk1CUHhBTGcuIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiRmRkbDNjdDJNQktObGU4NzFLVnV2TDBoTUpKZUtEUVZXT3RoY2lVSzlhSSIsInRpZCI6IjE2YjNjMDEzLWQzMDAtNDY4ZC1hYzY0LTdlZGEwODIwYjZkMyIsInVuaXF1ZV9uYW1lIjoidmhvdWRlYmluZUBtaWNyb3NvZnQuY29tIiwidXRpIjoidnFjS1p4MGhiVVNlbFI2QzJXMVRBQSIsInZlciI6IjEuMCIsIndpZHMiOlsiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19jYWUiOiIxIiwieG1zX2NjIjpbIkNQMSJdLCJ4bXNfZWRvdiI6dHJ1ZSwieG1zX2lkcmVsIjoiMSA0IiwieG1zX3RjZHQiOjE2NDUxMzcyMjh9.LVlsxEybRU7JPHo54uyDh-zOkVTHbkVfhjl4o1he_shpKn02Z2YHP8nOnnXbHgd2MqM1Sus-i-U6LlE1Sc4ARMOUle105XoaveiNZilRY0uREbxo1AjtIXZ_VSUH5o0VckJ06Id4kUNktVlHBamA3k4Qd5IBV4B-gcrzgbyxlADY9N0Vakafe4GH5zALf74I-v9nYZzhWgj_zgJvrC7vrYe708jx1hwTXVfGm_L8oSsvPzSH1sXDnZnAU5vraB1GO4QFdPzINXrwnEyTWZh3rh1qWJJ-MEB1T3EJbvA2PLKmSQPoXcoAQo_veh2RU9jPaWyptBKHYVriPnUsz_8LqA"
subscription = "6c065ea7-65cd-4a34-8e2a-3e21ad4a8e9f"
resource_group = "vince-rg"
resource_name = "vh-aoai-se"
model_deployment_name = "gpt-4o-mini-ft-raft"

deploy_params = {'api-version': "2023-05-01"}
deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'}

deploy_data = {
    "sku": {"name": "standard", "capacity": 1},
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": fine_tuned_model, #retrieve this value from the previous call, it will look like gpt-35-turbo-0613.ft-b044a9d3cf9c4228b5d393567f693b83
            "version": "1"
        }
    }
}
deploy_data = json.dumps(deploy_data)

request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}'

print('Creating a new deployment...')

r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data)

print(r)
print(r.reason)
print(r.json())

Creating a new deployment...
<Response [201]>
Created
{'id': '/subscriptions/6c065ea7-65cd-4a34-8e2a-3e21ad4a8e9f/resourceGroups/vince-rg/providers/Microsoft.CognitiveServices/accounts/vh-aoai-se/deployments/gpt-4o-mini-ft-raft', 'type': 'Microsoft.CognitiveServices/accounts/deployments', 'name': 'gpt-4o-mini-ft-raft', 'sku': {'name': 'standard', 'capacity': 1}, 'properties': {'model': {'format': 'OpenAI', 'name': 'gpt-4o-mini-2024-07-18.ft-6647cdbd869c4bc8b2140ff7197c4c23', 'version': '1'}, 'versionUpgradeOption': 'NoAutoUpgrade', 'capabilities': {'chatCompletion': 'true'}, 'provisioningState': 'Creating'}, 'systemData': {'createdBy': 'vhoudebine@microsoft.com', 'createdByType': 'User', 'createdAt': '2024-08-12T22:39:03.480334Z', 'lastModifiedBy': 'vhoudebine@microsoft.com', 'lastModifiedByType': 'User', 'lastModifiedAt': '2024-08-12T22:39:03.480334Z'}, 'etag': '"21179c8b-7670-4174-888b-0b7685765114"'}
