In [13]:
from openai import OpenAI
from dotenv import load_dotenv
import os 

In [14]:
# Load the environment variables from the .env file
load_dotenv()
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

Prepare the dataset

Dealing with list format, as shown above, might be convenient for small datasets. However, there are several benefits to saving the data in JSONL (JSON Lines) format. The benefits include scalability, interoperability, simplicity, and also compatibility with OpenAI API, which requires data in JSONL format when creating fine-tuning jobs.

The following code leverages the helper function prepare_data to create both the training and validation data in JSONL formats:

In [15]:
import json
from sklearn.model_selection import train_test_split

# Define the path to the original dataset
file_path = '../data/final_data/final_finetuning.jsonl'

# Load the data, handling possible malformed JSON
data = []
with open(file_path, 'r') as file:
    for line in file:
        try:
            json_obj = json.loads(line)
            data.append(json_obj)
        except json.JSONDecodeError:
            # Handle or log the malformed line if needed
            pass

# Split the data into training (80%) and test (20%) sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define file paths for the training and test data
train_file_path = '../data/final_data/final_finetuning_train.jsonl'
test_file_path = '../data/final_data/final_finetuning_test.jsonl'

# Save the training set
with open(train_file_path, 'w') as train_file:
    for item in train_data:
        train_file.write(json.dumps(item) + '\n')

# Save the test set
with open(test_file_path, 'w') as test_file:
    for item in test_data:
        test_file.write(json.dumps(item) + '\n')


In [17]:
training_file_id = client.files.create(
  file=open(train_file_path, "rb"),
  purpose="fine-tune"
)

test_file_id = client.files.create(
  file=open(test_file_path, "rb"),
  purpose="fine-tune"
)

print(f"Training File ID: {training_file_id}")
print(f"Test File ID: {test_file_id}")

BadRequestError: Error code: 400 - {'error': {'message': "Invalid file format. Line 3, message 4: No match for discriminator 'role' and value 'email' (allowed values: 'assistant', 'function', 'system', 'user')", 'type': 'invalid_request_error', 'param': None, 'code': None}}

### Create a fine-tuning job

This fine-tuning process is highly inspired by the openai-cookbook performing fine-tuning on Microsoft Azure.

To perform the fine-tuning we will use the following two steps: (1) define hyperparameters, and (2) trigger the fine-tuning.

We will fine-tune the davinci model and run it for 15 epochs using a batch size of 3 and a learning rate multiplier of 0.3 using the training and validation datasets.

Successful execution of the previous code displays below the unique identifier of the training and validation data.

In [31]:
response = client.fine_tuning.jobs.create(
  training_file=training_file_id.id, 
  validation_file=validation_file_id.id,
  model="gpt-3.5-turbo", 
  hyperparameters={
    "n_epochs": 15,
	"batch_size": 3,
	"learning_rate_multiplier": 0.3
  }
)
job_id = response.id
status = response.status

print(f'Fine-tunning model with jobID: {job_id}.')
print(f"Training Response: {response}")
print(f"Training Status: {status}")

Fine-tunning model with jobID: ftjob-Cz9oUrArJEJnGLAtijUdGOoX.
Training Response: FineTuningJob(id='ftjob-Cz9oUrArJEJnGLAtijUdGOoX', created_at=1711658631, error=Error(code=None, message=None, param=None, error=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=15, batch_size=3, learning_rate_multiplier=0.3), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-wkUFLlJRyOXDuAkBFUtPtrii', result_files=[], status='validating_files', trained_tokens=None, training_file='file-N59UqF5TuZwPnrj1kVfkbgqI', validation_file='file-zPH1ujZjm1QNWUoyug6Ws8zW', user_provided_suffix=None)
Training Status: validating_files


The code above generates the following information for the jobID (`ftjob-SqZvz9Rpjn2nSxtsn8ozMJu4`), the training response, and the training status (pending).

This pending status does not provide any relevant information. However, we can have more insight into the training process by running the following code:

In [32]:
import signal
import datetime


def signal_handler(sig, frame):
    status = client.fine_tuning.jobs.retrieve(job_id).status
    print(f"Stream interrupted. Job is still {status}.")
    return


print(f"Streaming events for the fine-tuning job: {job_id}")

signal.signal(signal.SIGINT, signal_handler)

events = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id)
try:
    for event in events:
        print(
            f'{datetime.datetime.fromtimestamp(event.created_at)} {event.message}'
        )
except Exception:
    print("Stream interrupted (client disconnected).")

Streaming events for the fine-tuning job: ftjob-Cz9oUrArJEJnGLAtijUdGOoX
2024-03-28 16:43:51 Validating training file: file-N59UqF5TuZwPnrj1kVfkbgqI and validation file: file-zPH1ujZjm1QNWUoyug6Ws8zW
2024-03-28 16:43:51 Created fine-tuning job: ftjob-Cz9oUrArJEJnGLAtijUdGOoX


### Check the fine-tuning job status

Let's verify that our operation was successful, and additionally, we can examine all the fine-tuning operations by using a list operation.

In [33]:
import time

status = client.fine_tuning.jobs.retrieve(job_id).status
if status not in ["succeeded", "failed"]:
    print(f"Job not in terminal status: {status}. Waiting.")
    while status not in ["succeeded", "failed"]:
        time.sleep(2)
        status = client.fine_tuning.jobs.retrieve(job_id).status
        print(f"Status: {status}")
else:
    print(f"Finetune job {job_id} finished with status: {status}")
print("Checking other finetune jobs in the subscription.")
result = client.fine_tuning.jobs.list()
print(f"Found {len(result.data)} finetune jobs.")

Job not in terminal status: validating_files. Waiting.
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: ru

### Validation of the model

Finally, the fine-tuned model can be retrieved from the “fine_tuned_model” attribute. The following print statement shows that the name of the final mode is: `ft:davinci-002:personal::8gKnyxn3`

In [37]:
# Retrieve the finetuned model
fine_tuned_model = result.data[0].fine_tuned_model
print(fine_tuned_model)


ft:gpt-3.5-turbo-0125:personal::97qznw0E


With this model, we can run queries to validate its results by providing a prompt, the model name, and creating a query with the openai.Completion.create() function. The result is retrieved from the answer dictionary as follows:

In [39]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "Develop comprehensive website content for our New Beginnings Savings Account, specifically designed for permanent residents."}
  ]
)
print(answer.choices[0].message)

# new_prompt = "Design an email for the TD Student Line of Credit, aimed at students seeking flexible funding solutions for their academic journey"
# answer = client.completions.create(
#   model=fine_tuned_model,
#   prompt=new_prompt
# )

# print(answer.choices[0].text)

ChatCompletionMessage(content="Welcome to our leading financial institution, where your financial journey starts with assurance and growth. Explore our New Beginnings Savings Account, tailored exclusively for permanent residents like you. This account offers more than just a place to keep your savings; it represents a value and growth-driven financial relationship you can trust.\n\n**New Beginnings Savings Account for Permanent Residents at a Glance:**\n\n**High-Yield Interest Rates:** Enjoy a competitive interest rate that grows your savings effectively and aligns with your aspirations.\n\n**No Monthly Maintenance Fee:** We value your progress towards reaching your financial goals — that's why there are no monthly maintenance fees associated with the account.\n\n**Easy Access and Online Personal Banking:** Access your account information, move money, deposit checks, and view transaction history with our online and mobile banking services. Experience seamless banking, tailored to meet 

In [40]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "Develop comprehensive website content for our Savings Account in 100 words, specifically designed for permanent residents."}
  ]
)
print(answer.choices[0].message)

# new_prompt = "Design an email for the TD Student Line of Credit, aimed at students seeking flexible funding solutions for their academic journey"
# answer = client.completions.create(
#   model=fine_tuned_model,
#   prompt=new_prompt
# )

# print(answer.choices[0].text)

ChatCompletionMessage(content='Explore perpetual benefits with our Savings Account, tailored for long-term residents. With competitive interest rates, seamless online access, and zero monthly fees, your funds will flourish effortlessly. Enjoy peace of mind with free global transfers and industry-leading security. Extend your savings with adjustable terms and automatic deposits. Our intuitive mobile app means financial freedom is always within reach. Join a bank that knows the value of permanency, providing lifetime support and loyalty rewards. Your future deserves stability. Your savings deserve growth. Partner with us and build a foundation for a prosperous life here, now, and forever.', role='assistant', function_call=None, tool_calls=None)


In [42]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "create detailed social media content for mortage seekers in 300 words, specifically designed for permanent residents."}
  ]
)
print(answer.choices[0].message)

# new_prompt = "Design an email for the TD Student Line of Credit, aimed at students seeking flexible funding solutions for their academic journey"
# answer = client.completions.create(
#   model=fine_tuned_model,
#   prompt=new_prompt
# )

# print(answer.choices[0].text)

ChatCompletionMessage(content='🏡 Are you a permanent resident looking to put down roots in the land of opportunity? Let us help you unlock the door to your dream home! 🌟\n\n🛠️ Our innovative mortgages are tailor-made for permanent residents, making the process smoother and more rewarding. We understand the nuances that come with your residency status and are here to guide you every step of the way!\n\n🔑 Dive into our exclusive range of mortgage packages, each one as unique as the American Dream itself. From low down payments to attractive interest rates, we\'ve got you covered. Our dedicated team will ensure that you secure a mortgage that not only fits your financial situation but also aligns with your long-term goals.\n\n🏋️\u200d♀️ Worried about credit history? Don\'t be! We specialize in working with residents building their credit scores in the U.S. Lenders that understand your journey are here walking by your side.\n\n📈 Plus, with our renowned online application process, getting c

In [43]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "create detailed social media content for mortage seekers, specifically designed for permanent residents."}
  ]
)
print(answer.choices[0].message)

# new_prompt = "Design an email for the TD Student Line of Credit, aimed at students seeking flexible funding solutions for their academic journey"
# answer = client.completions.create(
#   model=fine_tuned_model,
#   prompt=new_prompt
# )

# print(answer.choices[0].text)

ChatCompletionMessage(content="Are you looking to put down roots and establish your own piece of paradise in a new country? Unlock the door to your next chapter as a permanent resident with our hassle-free mortgage solutions! Our experienced team understands the unique needs of newcomers and is here to guide you every step of the way. From pre-approval to unlocking the front door of your dream home, we have you covered. As a permanent resident, your future is bright, and we're excited to be part of that journey. Welcome home! #MortgageAdventures #PermanentResidentLife", role='assistant', function_call=None, tool_calls=None)


In [45]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "create detailed social media content for mortage seekers in 200 words, specifically designed for permanent residents."}
  ]
)
print(answer.choices[0].message)

# new_prompt = "Design an email for the TD Student Line of Credit, aimed at students seeking flexible funding solutions for their academic journey"
# answer = client.completions.create(
#   model=fine_tuned_model,
#   prompt=new_prompt
# )

# print(answer.choices[0].text)

ChatCompletionMessage(content="🏡🔍 Looking to purchase your dream home in the U.S.? Our mortgage solutions are tailored for permanent residents with low rates, minimal down payments, and competitive terms to help you achieve homeownership. Say goodbye to sky-high rents and invest in your future starting today. Whether you're new to the mortgage process or seeking to refinance, our expert team understands the unique needs of permanent residents and will guide you every step of the way. Why let citizenship status hold you back from living the American Dream? DM us to learn more about our exclusive offers and eligibility requirements - your new home might be more within reach than you think. Come home to stability, security, and the pride of owning where your heart is. Housing stability - a step closer to citizenship and community integration. Let's make this journey together. 🇺🇸❤️ #MortgagesForResidents #DreamHome #PermanentResidency #MortgageSolutions", role='assistant', function_call=No

In [None]:
answer = client.chat.completions.create(
  model=fine_tuned_model,
  messages=[
    {"role": "system", "content": "This is a parameter-based prompt for creating marketing materials"},
    {"role": "user", "content": "Create a website marketing campaign for a Checking Account aimed at International Students in 5 lines"}
  ]
)
print(answer.choices[0].message)

NameError: name 'client' is not defined