In [None]:
#————————————————————

# Name: Azure OpenAI Fine Tuning (V1)

# Purpose:


# Company: Allgeier Schweiz AG
# Author: Nicolas Rehder (nrehder@allgeier.ch)
# Create for: SDSC 2024
# Date Created: 22.01.2024
# Last Updated: 22.01.2024
# Python Version: 3.10.4

# General Sources:

# Azure Authentication Token
# https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token()

# Azure Openai Usage:
# https://learn.microsoft.com/en-us/azure/ai-services/openai/tutorials/fine-tune?tabs=python-new%2Ccommand-line
# https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/fine-tuning?tabs=turbo%2Cpython&pivots=programming-language-python
# https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt?tabs=python&pivots=programming-language-chat-completions#few-shot-learning-with-chat-completion

# Additionals:
# https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models

# Download Python packages (run the below command in terminal if packages have not yet been installed)
# pip install -r C:\Python\sdsc\requirements.txt

#————————————————————

In [1]:
# Import Python packages
import os
import io
import time
from io import StringIO
import json
from dotenv import load_dotenv
from pathlib import Path
import pandas as pd
from openai import AzureOpenAI
import json
from IPython.display import clear_output
from IPython.core.display import HTML
import requests
import random

In [4]:
# Load required variables from env file.
load_dotenv(dotenv_path=Path("C:\\Python\\azure-openai-lab\\.venv\\.env")) #Error sometimes due to \ or \\. Try one or the other. /workspaces/azure-openai-lab/.venv/.env

# Load Azure OpenAI Key and Endpoint. These values can be found within the Azure OpenAI Service resource in portal.azure.com under Keys and Endpoint
azure_oai_key = os.environ['AZURE_OPENAI_KEY']
azure_oai_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']

# Load Temmporary Azure Authentication Token to deploy fine tuned model
azure_auth_token = os.environ['AZURE_TEMP_AUTH_TOKEN'] # Launch the Cloud Shell from the Azure portal. Then run: az account get-access-token. Save this token in the env file under variable AZURE_TEMP_AUTH_TOKEN.

In [5]:
# Initialize the Azure OpenAI client
client = AzureOpenAI(
    api_key = azure_oai_key,  
    api_version = "2024-02-15-preview",
    azure_endpoint = azure_oai_endpoint
    )

In [4]:
training_file_name = 'recipes-training-set.jsonl'
validation_file_name = 'recipes-validation-set.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file=open(training_file_name, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file=open(validation_file_name, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-b1321047eb7f4ea89bc1f2f976dfd51d
Validation file ID: file-8fff4373258b45ee98ec14da71e977cf


In [6]:
# Retrieve data file if already uploaded
for i in client.files.list():
    if "recipes-training-set" in i.filename:
        training_file_id = i.id
    else:
        validation_file_id = i.id

In [5]:
finetune = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model="gpt-35-turbo-0613", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters. 
)

job_id = finetune.id


In [6]:
# Track training status

start_time = time.time()

# Get the status of our fine-tuning job.
finetune = client.fine_tuning.jobs.retrieve(job_id)

status = finetune.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)
    
    finetune = client.fine_tuning.jobs.retrieve(job_id)
    print(finetune.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = finetune.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
finetune = client.fine_tuning.jobs.list()
print(f'Found {len(finetune.data)} fine-tune jobs.')

Fine-tuning job ftjob-9dcdea42da2e4c1e94666f09e6a882ea finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 1 fine-tune jobs.


In [12]:
#Retrieve fine_tuned_model name

finetune = client.fine_tuning.jobs.retrieve(job_id)
fine_tuned_model = finetune.fine_tuned_model

In [None]:
# Deploy fine tuned model

token = azure_auth_token
subscription = "fade7a40-9037-4aeb-82c9-e70f8b49217a"  
resource_group = "rgopenaisweden"
resource_name = "mssp-openai-sweden"
model_deployment_name ="gpt-35-turbo-0613-ft-" + str(random.randrange(0,1000)) #ascertains that if multiple models deployed, each has its own unique name.

deploy_params = {'api-version': "2023-05-01"} 
deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'}

deploy_data = {
    "sku": {"name": "standard", "capacity": 10}, 
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": fine_tuned_model,
            "version": "1"
        }
    }
}
deploy_data = json.dumps(deploy_data)

request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}'

print('Creating a new deployment...')

r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data)

print(r)
print(r.reason)
print(r.json())

In [15]:
completion = client.chat.completions.create(
  model = "gpt-35-turbo-0613-ft",
  #response_format={ "type": "json_object" }, # Not support for fine tuned models
  messages = [    
    {"role": "system", "content": "You are an Cooking Assistant specialising in vegan recipes. your cooking style is mediterranean asian fusion, similar to a mix between Jamie Oliver and Joanne Molinaro. You  will be given a set of ingredients and respond with a great tasting recipe involving those ingredients."},
    {"role": "user", "content": "Avocado, Coconut Milk, Tofu, Soy Sauce"}
  ]
)


In [17]:
output = completion.choices[0].message.content

In [20]:
#completion_output = completion.choices[0].message.content
#recipe = completion_output.replace("\n", "")

In [21]:
#output_json = html_to_json.convert(completion_output)

In [22]:
# with open('recipe.jsonl', 'w', encoding='utf-8') as f:
#     json.dump(output_json, f, indent=4)