In [None]:
#————————————————————

# Name: Azure OpenAI Fine Tuning (V1)

# Purpose:


# Company: Allgeier Schweiz AG
# Author: Nicolas Rehder (nrehder@allgeier.ch)
# Create for: SDSC 2024
# Date Created: 22.01.2024
# Last Updated: 22.01.2024
# Python Version: 3.10.4

# General Sources:

# Azure Authentication Token
# https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token()

# Azure Openai Usage:
# https://learn.microsoft.com/en-us/azure/ai-services/openai/tutorials/fine-tune?tabs=python-new%2Ccommand-line
# https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/fine-tuning?tabs=turbo%2Cpython&pivots=programming-language-python
# https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt?tabs=python&pivots=programming-language-chat-completions#few-shot-learning-with-chat-completion

# Additionals:
# https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models

# Download Python packages (run the below command in terminal if packages have not yet been installed)
# pip install -r C:\Python\sdsc\requirements.txt

#————————————————————

In [12]:
# Import required libraries
import os
import io
import time
from io import StringIO
import json
from dotenv import load_dotenv
from pathlib import Path
import pandas as pd
from openai import AzureOpenAI
import json
from IPython.display import clear_output
from IPython.core.display import HTML
import requests
import random
import html_to_json

In [2]:
# Load required variables from env file.
load_dotenv(dotenv_path=Path("C:\Python\openai-lab\.venv\.env"))

# Load Azure OpenAI Key and Endpoint. These values can be found within the Azure OpenAI Service resource in portal.azure.com under Keys and Endpoint
azure_oai_key = os.environ['AZURE_OPENAI_KEY']
azure_oai_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']

# Load Temmporary Azure Authentication Token to deploy fine tuned model
azure_auth_token = os.environ['AZURE_TEMP_AUTH_TOKEN'] # Launch the Cloud Shell from the Azure portal. Then run: az account get-access-token. Save this token in the env file under variable AZURE_TEMP_AUTH_TOKEN.

In [3]:
# Initialize the Azure OpenAI client
client = AzureOpenAI(
    api_key = azure_oai_key,  
    api_version = "2024-02-15-preview",
    azure_endpoint = azure_oai_endpoint
    )

In [22]:
# with open('training_set.jsonl', 'w', encoding='utf-8') as f:
#     json.dump(training_set, f)

# with open('validation_set.jsonl', 'w', encoding='utf-8') as f:
#     json.dump(validation_set, f)

In [6]:
training_file_name = 'chef-training-set.jsonl'
validation_file_name = 'chef-validation-set.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file=open(training_file_name, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file=open(validation_file_name, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-7c802ea56bd1411e8bf716a5efb7192e
Validation file ID: file-ad17dbd3bea344bdb4f9b1fb645226f2


In [7]:
finetune = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model="gpt-35-turbo-0613", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters. 
)

job_id = finetune.id


In [11]:
# Track training status

start_time = time.time()

# Get the status of our fine-tuning job.
finetune = client.fine_tuning.jobs.retrieve(job_id)

status = finetune.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)
    
    finetune = client.fine_tuning.jobs.retrieve(job_id)
    print(finetune.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = finetune.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
finetune = client.fine_tuning.jobs.list()
print(f'Found {len(finetune.data)} fine-tune jobs.')

Fine-tuning job ftjob-383d4910f04546ac9cae6304457924d0 finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 1 fine-tune jobs.


In [12]:
#Retrieve fine_tuned_model name

finetune = client.fine_tuning.jobs.retrieve(job_id)
fine_tuned_model = finetune.fine_tuned_model

In [29]:
# Deploy fine tuned model

token = azure_auth_token
subscription = "749f8bd8-b908-4a95-8da7-1ea14d0f1e60"  
resource_group = "rg-sdsc"
resource_name = "oaisdsc"
model_deployment_name ="gpt-35-turbo-0613-ft-" + str(random.randrange(0,1000)) #ascertains that if multiple models deployed, each has its own unique name.

deploy_params = {'api-version': "2023-05-01"} 
deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'}

deploy_data = {
    "sku": {"name": "standard", "capacity": 10}, 
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": fine_tuned_model,
            "version": "1"
        }
    }
}
deploy_data = json.dumps(deploy_data)

request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}'

print('Creating a new deployment...')

r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data)

print(r)
print(r.reason)
print(r.json())

Creating a new deployment...
<Response [201]>
Created
{'id': '/subscriptions/749f8bd8-b908-4a95-8da7-1ea14d0f1e60/resourceGroups/rg-sdsc/providers/Microsoft.CognitiveServices/accounts/oaisdsc/deployments/gpt-35-turbo-0613-ft305', 'type': 'Microsoft.CognitiveServices/accounts/deployments', 'name': 'gpt-35-turbo-0613-ft305', 'sku': {'name': 'standard', 'capacity': 1}, 'properties': {'model': {'format': 'OpenAI', 'name': 'gpt-35-turbo-0613.ft-383d4910f04546ac9cae6304457924d0', 'version': '1'}, 'versionUpgradeOption': 'NoAutoUpgrade', 'capabilities': {'chatCompletion': 'true'}, 'provisioningState': 'Creating', 'rateLimits': [{'key': 'request', 'renewalPeriod': 10, 'count': 1}, {'key': 'token', 'renewalPeriod': 60, 'count': 1000}]}, 'systemData': {'createdBy': 'sds2024trainer@outlook.com', 'createdByType': 'User', 'createdAt': '2024-03-13T08:07:58.3022134Z', 'lastModifiedBy': 'sds2024trainer@outlook.com', 'lastModifiedByType': 'User', 'lastModifiedAt': '2024-03-13T08:07:58.3022134Z'}, 'etag

In [19]:
completion = client.chat.completions.create(
  model = "gpt-35-turbo-0613-ft305",
  #response_format={ "type": "json_object" }, # Not support for fine tuned models
  messages = [    
    {"role": "system", "content": "You are an Cooking Assistant specialising in vegan recipes. your cooking style is mediterranean asian fusion, similar to a mix between Jamie Oliver and Joanne Molinaro. You  will be given a set of ingredients and respond with a great tasting recipe involving those ingredients."},
    {"role": "user", "content": "Avocado, Coconut Milk, Tofu, Soy Sauce"}
  ]
)


In [24]:
HTML(completion.choices[0].message.content)

In [20]:
completion_output = completion.choices[0].message.content
#recipe = completion_output.replace("\n", "")

In [23]:
completion_output

'Recipe: Asian-Inspired Tofu Avocado Curry\n\nIngredients:\n- 1 ripe avocado, pitted and sliced\n- 1 can of coconut milk\n- 200g tofu, cubed\n- 2 tablespoons soy sauce\n- 1 tablespoon olive oil\n- 1 onion, thinly sliced\n- 2 cloves garlic, minced\n- 1 teaspoon ginger, grated\n- 1 teaspoon curry powder\n- 1 teaspoon turmeric\n- 1 teaspoon paprika\n- Fresh cilantro, chopped for garnish\n- Steamed rice, for serving\n\nInstructions:\n1. In a large deep skillet or wok, heat olive oil over medium heat. Add onion, garlic, and ginger. Sauté until the onion becomes translucent.\n2. Add the tofu cubes to the skillet and cook until slightly browned on all sides.\n3. Add curry powder, turmeric, and paprika. Stir to evenly coat tofu and vegetables.\n4. Pour in the coconut milk and bring to a gentle simmer. Allow the flavors to meld for about 5 minutes.\n5. Season with soy sauce and stir well.\n6. Gently fold in avocado slices and cook for an additional 2 minutes.\n7. Serve over steamed rice and gar

In [21]:
output_json = html_to_json.convert(completion_output)

In [22]:
with open('recipe.jsonl', 'w', encoding='utf-8') as f:
    json.dump(output_json, f, indent=4)