In [None]:
#————————————————————

# Name: Azure OpenAI API, Fine-Tuning

# Purpose: This notebook will deploy a Fine-tuned model using GPT 4o mini. [NOT USED IN WORKSHOP]

# Company: Allgeier Schweiz AG
# Author: Nicolas Rehder (nrehder@allgeier.ch)
# Create for: SDSC 2024 & ZHAW 2025
# Date Created: 22.01.2024
# Last Updated: 19.01.2025
# Python Version: 3.12.1

# General Sources:
# Use Region Sweden Central

# Azure Authentication Token
# https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token()


# Additionals:

# If necessary, download Python packages (run the below command in terminal if packages have not yet been installed)
# pip install -r C:\Python\sdsc\requirements.txt

#————————————————————

In [1]:
# [NOT USED IN WORKSHOP]

# Import Python packages
import os
import io
import time
from io import StringIO
import json
from dotenv import load_dotenv
from pathlib import Path
import pandas as pd
from openai import AzureOpenAI
import json
from IPython.display import clear_output
from IPython.core.display import HTML
import requests
import random

In [2]:
# [NOT USED IN WORKSHOP]

# Load required variables from .env file.
load_dotenv(dotenv_path=Path("/workspaces/azure-openai-lab/.venv/.env")) #Error sometimes due to \ or \\. Try one or the other. "C:\\Python\\azure-openai-lab\\.venv\\.env"

# Load Azure OpenAI Key and Endpoint. These values can be found within the Azure OpenAI Service resource in portal.azure.com under Keys and Endpoint
azure_oai_key = os.environ['AZURE_OPENAI_KEY_P34']
azure_oai_endpoint = os.environ['AZURE_OPENAI_ENDPOINT_P34']

# Load Temmporary Azure Authentication Token to deploy fine tuned model
# azure_auth_token = os.environ['AZURE_TEMP_AUTH_TOKEN'] # Launch the Cloud Shell from the Azure portal. Then run: az account get-access-token. Save this token in the env file under variable AZURE_TEMP_AUTH_TOKEN.

In [3]:
# [NOT USED IN WORKSHOP]

# Initialize the Azure OpenAI client
client = AzureOpenAI(
    api_key = azure_oai_key,  
    azure_endpoint = azure_oai_endpoint,
    api_version = "2024-05-01-preview" #"2024-02-15-preview"
    )

In [4]:
# [NOT USED IN WORKSHOP]

#Load JSONL to Azure OpenAI Service

training_file_path = r"/workspaces/azure-openai-lab/data/recipes-training-set.jsonl"
validation_file_path = r"/workspaces/azure-openai-lab/data/recipes-validation-set.jsonl"

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file=open(training_file_path, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file=open(validation_file_path, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-8d30450ec0c049e38628465fbce8f351
Validation file ID: file-3796d006541b42d892c0ca18a6b65a12


In [4]:
# Retrieve data file if already uploaded
# for i in client.files.list():
#     if "recipes-training-set" in i.filename:
#         training_file_id = i.id
#     elif "recipes-validation-set" in i.filename:
#         validation_file_id = i.id

In [5]:
# [NOT USED IN WORKSHOP]

# Initalize Fine-Tuning Job
finetune = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model="gpt-4o-mini",
)

job_id = finetune.id

In [6]:
# [NOT USED IN WORKSHOP]

# Model deployment current takes 45 - 60 minutes

# Track training status
start_time = time.time()

# Get the status of our fine-tuning job.
finetune = client.fine_tuning.jobs.retrieve(job_id)

status = finetune.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)
    
    finetune = client.fine_tuning.jobs.retrieve(job_id)
    print(finetune.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = finetune.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
finetune = client.fine_tuning.jobs.list()
print(f'Found {len(finetune.data)} fine-tune jobs.')

Fine-tuning job ftjob-9f237207c3f947a1936026a730477ea6 finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 1 fine-tune jobs.


In [None]:
# [NOT USED IN WORKSHOP]

# Retrieve fine_tuned_model name

# finetune = client.fine_tuning.jobs.retrieve(job_id)
# fine_tuned_model = finetune.fine_tuned_model

# # Deploy fine tuned model

# token = azure_auth_token
# subscription = "fade7a40-9037-4aeb-82c9-e70f8b49217a"  
# resource_group = "rgopenaisweden"
# resource_name = "mssp-openai-sweden"
# model_deployment_name ="gpt-35-turbo-0613-ft-" + str(random.randrange(0,1000)) #ascertains that if multiple models deployed, each has its own unique name.

# deploy_params = {'api-version': "2023-05-01"} 
# deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'}

# deploy_data = {
#     "sku": {"name": "standard", "capacity": 10}, 
#     "properties": {
#         "model": {
#             "format": "OpenAI",
#             "name": fine_tuned_model,
#             "version": "1"
#         }
#     }
# }
# deploy_data = json.dumps(deploy_data)

# request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}'

# print('Creating a new deployment...')

# r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data)

# print(r)
# print(r.reason)
# print(r.json())