# **GPT 3.5 Turbo Fine-Tuning**

---



# **Data Generation**

**Set the Desired Behavior for the Model Through Prompt**

In [None]:
!pip install openai tenacity

In [None]:
prompt = """A model should act as a health assistant that takes human illness-related questions in English.
            and responds with the severity of the illness,cases for the illness, and treatments for the illness in English."""

temperature = .4
number_of_examples = 50

**Prompt(query) and Response**

In [None]:
import os
import openai
import random
from tenacity import retry, stop_after_attempt, wait_exponential

openai.api_key = "API_KEY"

N_RETRIES = 3

@retry(stop=stop_after_attempt(N_RETRIES), wait=wait_exponential(multiplier=1, min=4, max=70))
def generate_example(prompt, prev_examples, temperature=.5):
    messages=[
        {
            "role": "system",
            "content": f"You are generating data which will be used to train a machine learning model.\n\nYou will be given a high-level description of the model we want to train, and from that, you will generate data samples, each with a prompt/response pair.\n\nYou will do so in this format:\n```\nprompt\n-----------\n$prompt_goes_here\n-----------\n\nresponse\n-----------\n$response_goes_here\n-----------\n```\n\nOnly one prompt/response pair should be generated per turn.\n\nFor each turn, make the example slightly more complex than the last, while ensuring diversity.\n\nMake sure your samples are unique and diverse, yet high-quality and complex enough to train a well-performing model.\n\nHere is the type of model we want to train:\n`{prompt}`"
        }
    ]

    if len(prev_examples) > 0:
        if len(prev_examples) > 8:
            prev_examples = random.sample(prev_examples, 8)
        for example in prev_examples:
            messages.append({
                "role": "assistant",
                "content": example
            })

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        temperature=temperature,
        max_tokens=1000,
    )

    return response.choices[0].message['content']

# Generate examples
prev_examples = []
for i in range(number_of_examples):
    print(f'Generating example {i}')
    example = generate_example(prompt, prev_examples, temperature)
    prev_examples.append(example)

print(prev_examples)

Generating example 0
Generating example 1
Generating example 2
Generating example 3
Generating example 4
Generating example 5
Generating example 6
Generating example 7
Generating example 8
Generating example 9
Generating example 10
Generating example 11
Generating example 12
Generating example 13
Generating example 14
Generating example 15
Generating example 16
Generating example 17
Generating example 18
Generating example 19
Generating example 20
Generating example 21
Generating example 22
Generating example 23
Generating example 24
Generating example 25
Generating example 26
Generating example 27
Generating example 28
Generating example 29
Generating example 30
Generating example 31
Generating example 32
Generating example 33
Generating example 34
Generating example 35
Generating example 36
Generating example 37
Generating example 38
Generating example 39
Generating example 40
Generating example 41
Generating example 42
Generating example 43
Generating example 44
Generating example 4

**System Message**

In [None]:
def generate_system_message(prompt):

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
          {
            "role": "system",
            "content": "You will be given a high-level description of the model we are training, and from that, you will generate a simple system prompt for that model to use. Remember, you are not generating the system message for data generation -- you are generating the system message to use for inference. A good format to follow is `Given $INPUT_DATA, you will $WHAT_THE_MODEL_SHOULD_DO.`.\n\nMake it as concise as possible. Include nothing but the system prompt in your response.\n\nFor example, never write: `\"$SYSTEM_PROMPT_HERE\"`.\n\nIt should be like: `$SYSTEM_PROMPT_HERE`."
          },
          {
              "role": "user",
              "content": prompt.strip(),
          }
        ],
        temperature=temperature,
        max_tokens=500,
    )

    return response.choices[0].message['content']

system_message = generate_system_message(prompt)

print(f'The system message is: `{system_message}`. Feel free to re-run this cell if you want a better result.')

The system message is: `Given your illness-related question, the model will provide information on the severity of the illness, the number of cases, and potential treatments.`. Feel free to re-run this cell if you want a better result.


# **Create Training Data in the Format required for GPT-3.5 Fine-Tuning**

In [None]:
import json
import pandas as pd

# Initialize lists to store prompts and responses
prompts = []
responses = []

# Parse out prompts and responses from examples
for example in prev_examples:
  try:
    split_example = example.split('-----------')
    prompts.append(split_example[1].strip())
    responses.append(split_example[3].strip())
  except:
    pass

# Create a DataFrame
df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})

# Remove duplicates
df = df.drop_duplicates()

print('There are ' + str(len(df)) + ' successfully-generated examples.')

# Initialize list to store training examples
training_examples = []

# Create training examples in the format required for GPT-3.5 fine-tuning
for index, row in df.iterrows():
    training_example = {
        "messages": [
            {"role": "system", "content": system_message.strip()},
            {"role": "user", "content": row['prompt']},
            {"role": "assistant", "content": row['response']}
        ]
    }
    training_examples.append(training_example)

# Save training examples to a .jsonl file
with open('training_examples.jsonl', 'w') as f:
    for example in training_examples:
        f.write(json.dumps(example) + '\n')

There are 49 successfully-generated examples.


# **Model Training**

**Upload the data into OpenAI and grab the ID from OpenAI once it upload**

In [None]:
file_id = openai.File.create(
  file=open("/content/training_examples.jsonl", "rb"),
  purpose='fine-tune'
).id

**Fine tune the GPT 3.5 using File ID**

In [None]:
job = openai.FineTuningJob.create(training_file=file_id, model="gpt-3.5-turbo")
job_id = job.id

# **Infrence from  Fine-Tuned Model**

In [None]:
openai.FineTuningJob.list_events(id=job_id, limit=10)

<OpenAIObject list at 0x7913d8a158f0> JSON: {
  "object": "list",
  "data": [
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-4P0JgCumbEL8YBnKrHj8AcgN",
      "created_at": 1694752740,
      "level": "info",
      "message": "The job has successfully completed",
      "data": {},
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-Nqo9d1HTh6EiHNgsptfczH9L",
      "created_at": 1694752738,
      "level": "info",
      "message": "New fine-tuned model created: ft:gpt-3.5-turbo-0613:personal::7yutWnZt",
      "data": {},
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-d72jryDt7CerW1wRaJSBKFgT",
      "created_at": 1694752719,
      "level": "info",
      "message": "Step 141/147: training loss=0.09",
      "data": {
        "step": 141,
        "train_loss": 0.09374962747097015,
        "train_mean_token_accuracy": 0.9634146094322205
      },
      "type": "metrics"
    },

**Get the Model ID using fine_tuned_model Field**

In [None]:
model_name_pre_object = openai.FineTuningJob.retrieve(job_id)
model_name = model_name_pre_object.fine_tuned_model
print(model_name)

ft:gpt-3.5-turbo-0613:personal::7yutWnZt


**Response for Generated training Query**

In [None]:
response = openai.ChatCompletion.create(
    model=model_name,
    messages=[
      {
        "role": "system",
        "content": system_message,
      },
      {
          "role": "user",
          "content": df['prompt'].sample().values[0],
      }
    ],
)

response.choices[0].message['content']

'Your symptoms are serious and could indicate a heart attack. A heart attack occurs when the blood flow that brings oxygen to the heart muscle is severely reduced or cut off. This is a medical emergency and you should seek immediate medical attention. Treatment often involves medications and surgical procedures. Please go to the emergency room or call emergency services right away.'

**Response for User Query**

In [None]:
response = openai.ChatCompletion.create(
    model=model_name,
    messages=[
      {
        "role": "system",
        "content": system_message,
      },
      {
          "role": "user",
          "content": "Tell me about Influenza ?",
      }
    ],
)

response.choices[0].message['content']

"Influenza, commonly known as the flu, is a contagious respiratory illness caused by influenza viruses. It can cause mild to severe illness, and at times can lead to death. The severity can vary depending on the individual and the virus strain. Symptoms often include fever, cough, sore throat, muscle aches, fatigue, and runny or stuffy nose. The best way to prevent influenza is vaccination. Treatment usually involves rest, fluids, and over-the-counter medicines to reduce symptoms. In some cases, antiviral drugs may be prescribed to help lessen the severity and duration of the illness. It's important to contact a healthcare professional for an accurate diagnosis and guidance on treatment. Please consult a doctor if you're experiencing symptoms or have any concerns."