In [1]:
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
from openai import OpenAI

In [4]:
client = OpenAI()

In [5]:
response = client.chat.completions.create(
  # model= "ft:gpt-3.5-turbo-0125:personal::9pLUXPi8",
  model="gpt-4o-mini",
  messages=[ 
    {"role": "user", "content": "What is the return policy of CodeCraft Marketfplace?"}
  ]
)
print(response.choices[0].message.content)

I don’t have specific information about the return policy of CodeCraft Marketplace as of my last update in October 2023. Return policies can vary widely between different marketplaces and can change frequently. I recommend checking the official CodeCraft Marketplace website directly or contacting their customer service for the most accurate and up-to-date information regarding their return policy.


In [6]:
response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[ 
    {"role": "user", "content": "What is the video upload schedule of BuildWithLal Youtube?"}
  ]
)
print(response.choices[0].message.content)

I don't have access to real-time data or specific channels like BuildWithLal on YouTube. To find the video upload schedule for a particular channel, I recommend visiting their YouTube page directly. Many creators post their upload schedules in their video descriptions or community posts. Additionally, you can check their latest videos to see if they follow a consistent pattern in terms of upload frequency.


#

### 1. Preparing your dataset

In [7]:
{"messages": [{"role": "system", "content": "You are CodeCraft Marketplace agent to serve customer queries"}, {"role": "user", "content": "What is the return policy for CodeCraft Marketplace?"}, {"role": "assistant", "content": "Items can be returned within 30 days of delivery for a full refund or exchange. Products must be unused and in their original packaging."}]}

{"messages": [{"role": "system", "content": "You are CodeCraft Marketplace agent to serve customer queries"}, {"role": "user", "content": "How do I track my order?"}, {"role": "assistant", "content": "You can track your order by logging into your account and viewing your order history, or by using the tracking number provided in your shipment confirmation email."}]}

{"messages": [{"role": "system", "content": "You are CodeCraft Marketplace agent to serve customer queries"}, {"role": "user", "content": "What payment methods are accepted on CodeCraft Marketplace?"}, {"role": "assistant", "content": "We accept credit/debit cards, PayPal, and other popular payment methods such as Apple Pay and Google Wallet."}]}


{'messages': [{'role': 'system',
   'content': 'You are CodeCraft Marketplace agent to serve customer queries'},
  {'role': 'user',
   'content': 'What payment methods are accepted on CodeCraft Marketplace?'},
  {'role': 'assistant',
   'content': 'We accept credit/debit cards, PayPal, and other popular payment methods such as Apple Pay and Google Wallet.'}]}

##### To fine-tune a model, you are required to provide at least 10 examples. We typically see clear improvements from fine-tuning on 50 to 100 training examples with gpt-4o-mini and gpt-3.5-turbo, but the right number varies greatly based on the exact use case.

#

### 2. Validate Dataset

In [8]:
import json
from collections import defaultdict

In [12]:
data_path = "static/code-craft-marketplace.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    dataset = [json.loads(line) for line in f]

# Initial dataset stats
print("Num examples:", len(dataset))
print("First example:")

for message in dataset[0]["messages"]:
    print(message)

Num examples: 20
First example:
{'role': 'system', 'content': 'You are CodeCraft Marketplace agent to serve customer queries'}
{'role': 'user', 'content': 'What is the return policy for CodeCraft Marketplace?'}
{'role': 'assistant', 'content': 'Items can be returned within 30 days of delivery for a full refund or exchange. Products must be unused and in their original packaging.'}


In [13]:
# Format error checks
format_errors = defaultdict(int)

for ex in dataset:
    if not isinstance(ex, dict):
        format_errors["data_type"] += 1
        continue
        
    messages = ex.get("messages", None)
    if not messages:
        format_errors["missing_messages_list"] += 1
        continue
        
    for message in messages:
        if "role" not in message or "content" not in message:
            format_errors["message_missing_key"] += 1
        
        if any(k not in ("role", "content", "name", "function_call", "weight") for k in message):
            format_errors["message_unrecognized_key"] += 1
        
        if message.get("role", None) not in ("system", "user", "assistant", "function"):
            format_errors["unrecognized_role"] += 1
            
        content = message.get("content", None)
        function_call = message.get("function_call", None)
        
        if (not content and not function_call) or not isinstance(content, str):
            format_errors["missing_content"] += 1
    
    if not any(message.get("role", None) == "assistant" for message in messages):
        format_errors["example_missing_assistant_message"] += 1

if format_errors:
    print("Found errors:")
    for k, v in format_errors.items():
        print(f"{k}: {v}")
else:
    print("No errors found")

No errors found


#

### 3. Upload training data

In [14]:
response = client.files.create(
  file=open("static/code-craft-marketplace.jsonl", "rb"),
  purpose="fine-tune"
)

In [15]:
file_id = response.id

In [16]:
file_id

'file-BaE7uhZ0Mhvjn4dzxLcBCHgc'

#### Now we'll use this file id as a ref instead of our local file since this file is uploaded to openai server

#### The maximum file upload size is 1 GB

#

### 4. Create fine-tuned model by starting its job on the openAI server

In [17]:
response = client.fine_tuning.jobs.create(
  training_file=file_id, 
  model="gpt-3.5-turbo"
)

In [18]:
job_id = "ftjob-JokELaEJabtmwFFD1Y9mhkZu" #response.id

In [19]:
job_id

'ftjob-JokELaEJabtmwFFD1Y9mhkZu'

##### After you've started a fine-tuning job, it may take some time to complete. Your job may be queued behind other jobs in our system, and training a model can take minutes or hours depending on the model and dataset size. After the model training is completed, the user who created the fine-tuning job will receive an email confirmation.

### 5. Check fine-tuned model status

In [20]:
#client.fine_tuning.jobs.list(limit=10)

# Retrieve the state of a fine-tune
response = client.fine_tuning.jobs.retrieve(job_id)
response

FineTuningJob(id='ftjob-JokELaEJabtmwFFD1Y9mhkZu', created_at=1722023952, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-3.5-turbo-0125:personal::9pLUXPi8', finished_at=1722024367, hyperparameters=Hyperparameters(n_epochs=5, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-CMYq0wLF2zfrsXDQmnDi4syt', result_files=['file-RAJKNaqcjqYwsbyMVex6pJzh'], seed=1664177392, status='succeeded', trained_tokens=6065, training_file='file-MZCZWddgkInBK9pDNdJ1Qvdd', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)

#### Can check status here as well https://platform.openai.com/finetune/ftjob-FMOOJVgyILjimmSI26OTakRT?filter=all

In [21]:
response.status

'succeeded'

In [25]:
model_name = response.fine_tuned_model
model_name = 'ft:gpt-3.5-turbo-0125:personal::9pLUXPi8'

AttributeError: 'ChatCompletion' object has no attribute 'fine_tuned_model'

#

### 6. Use a fine-tuned model

In [None]:
response = client.chat.completions.create(
  # model=model_name,
  model='gpt-3.5-turbo',  
  messages=[  
    {"role": "user", "content": "Do you ship internationally?"}
  ]
)
print(response.choices[0].message.content)

In [26]:
response = client.chat.completions.create(
  model=model_name, #'gpt-3.5-turbo',
  messages=[
    {"role": "user", "content": "What is the return policy of CodeCraft Marketfplace?"}
  ]
)
print(response.choices[0].message.content)

CodeCraft Marketplace offers a 30-day return policy for unused and unopened items. Customers must contact customer support to initiate a return and receive a return authorization. A refund will be issued once the returned item is received and inspected.
