In [1]:
import json
import openai
import os, pandas as pd

# Fine-tuning a GPT-3 model for specific prompts

In [2]:
# Load the API key from env var

openai.api_key = os.getenv("OPENAI_API_KEY")

In [1]:
def convert_arrays_to_csv(file_path):
    # Read the array of arrays from the .txt file
    with open(file_path, 'r') as file:
        array_of_arrays = np.array(eval(file.read()))

    # Convert the array of arrays to a DataFrame
    file_path = 'path/to/your/file.txt'
    df = convert_arrays_to_csv(file_path)

    # Save the DataFrame to a CSV file
    csv_file_path = file_path.replace('.txt', '.csv')
    df.to_csv(csv_file_path, index=False)

    # Assign df to the newly created CSV file
    df = pd.read_csv(csv_file_path)

    return df

!openai tools fine_tunes.prepare_data -f training_data.jsonl

Analyzing...

- Your file contains 7 prompt-completion pairs. In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples
- All prompts end with suffix `.`
- All prompts start with prefix `Number of Fingers: `. Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion
- All completions start with prefix `$(r\sin(`. Most of the time you should only add the output data into the completion, without any prefix
- All completions end with suffix `)) = \psi $`. This suffix seems very long. Consider replacing with a shorter suffix, such as `\n`
- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://platform.openai.com/docs/guides/fine-tuning/preparing-

In [7]:
file_id = upload_response.id
file_id

'file-D9znyKJPuuI1DQ8Gpmdgyji8'

In [16]:
fine_tune_response = openai.FineTune.create(training_file=file_id)
fine_tune_response

<FineTune fine-tune id=ft-eQekbr32V3ud5gCT4uB2MF3k at 0x7f7958318f90> JSON: {
  "created_at": 1687113548,
  "events": [
    {
      "created_at": 1687113548,
      "level": "info",
      "message": "Created fine-tune: ft-eQekbr32V3ud5gCT4uB2MF3k",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.01
  },
  "id": "ft-eQekbr32V3ud5gCT4uB2MF3k",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-vEiwthoCbMO1wp6DrXzyRMxP",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 4708,
      "created_at": 1687113163,
      "filename": "file",
      "id": "file-D9znyKJPuuI1DQ8Gpmdgyji8",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",
      "status_details": null
    }
  ],
  "updated_at": 1687113548,
  "validation_files": []
}

In [29]:
fine_tune_events = openai.FineTune.list_events(id=fine_tune_response.id)
fine_tune_events

<OpenAIObject list at 0x7f7970cdae00> JSON: {
  "data": [
    {
      "created_at": 1687113548,
      "level": "info",
      "message": "Created fine-tune: ft-eQekbr32V3ud5gCT4uB2MF3k",
      "object": "fine-tune-event"
    }
  ],
  "object": "list"
}

Check the progress with `openai.FineTune.retrieve(id=fine_tune_response.id)` and get an object with the fine-tuning job data

In [30]:
retrieve_response = openai.FineTune.retrieve(id="ft-eQekbr32V3ud5gCT4uB2MF3k")
retrieve_response

<FineTune fine-tune id=ft-eQekbr32V3ud5gCT4uB2MF3k at 0x7f79588c09a0> JSON: {
  "created_at": 1687113548,
  "events": [
    {
      "created_at": 1687113548,
      "level": "info",
      "message": "Created fine-tune: ft-eQekbr32V3ud5gCT4uB2MF3k",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.01
  },
  "id": "ft-eQekbr32V3ud5gCT4uB2MF3k",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-vEiwthoCbMO1wp6DrXzyRMxP",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 4708,
      "created_at": 1687113163,
      "filename": "file",
      "id": "file-D9znyKJPuuI1DQ8Gpmdgyji8",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",
      "status_details": null
    }
  ],
  "updated_at": 1687113548,
  "validation_files": []
}

In [31]:
openai.FineTune.list()

<OpenAIObject list at 0x7f79588c7e00> JSON: {
  "data": [
    {
      "created_at": 1687113168,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": 1,
        "learning_rate_multiplier": 0.1,
        "n_epochs": 4,
        "prompt_loss_weight": 0.01
      },
      "id": "ft-vddYJn4wTBeKhVkN6GlVg8bE",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-vEiwthoCbMO1wp6DrXzyRMxP",
      "result_files": [],
      "status": "pending",
      "training_files": [
        {
          "bytes": 4708,
          "created_at": 1687113163,
          "filename": "file",
          "id": "file-D9znyKJPuuI1DQ8Gpmdgyji8",
          "object": "file",
          "purpose": "fine-tune",
          "status": "processed",
          "status_details": null
        }
      ],
      "updated_at": 1687113783,
      "validation_files": []
    },
    {
      "created_at": 1687113263,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": null

In [35]:
fine_tuned_model = openai.FineTune.retrieve(id=fine_tune_response.id)
print(fine_tuned_model)

{
  "created_at": 1687113548,
  "events": [
    {
      "created_at": 1687113548,
      "level": "info",
      "message": "Created fine-tune: ft-eQekbr32V3ud5gCT4uB2MF3k",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.01
  },
  "id": "ft-eQekbr32V3ud5gCT4uB2MF3k",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-vEiwthoCbMO1wp6DrXzyRMxP",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 4708,
      "created_at": 1687113163,
      "filename": "file",
      "id": "file-D9znyKJPuuI1DQ8Gpmdgyji8",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",
      "status_details": null
    }
  ],
  "updated_at": 1687113548,
  "validation_files": []
}


In [38]:
openai.FineTune.list()

<OpenAIObject list at 0x7f79588bcae0> JSON: {
  "data": [
    {
      "created_at": 1687113168,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": 1,
        "learning_rate_multiplier": 0.1,
        "n_epochs": 4,
        "prompt_loss_weight": 0.01
      },
      "id": "ft-vddYJn4wTBeKhVkN6GlVg8bE",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-vEiwthoCbMO1wp6DrXzyRMxP",
      "result_files": [],
      "status": "pending",
      "training_files": [
        {
          "bytes": 4708,
          "created_at": 1687113163,
          "filename": "file",
          "id": "file-D9znyKJPuuI1DQ8Gpmdgyji8",
          "object": "file",
          "purpose": "fine-tune",
          "status": "processed",
          "status_details": null
        }
      ],
      "updated_at": 1687113783,
      "validation_files": []
    },
    {
      "created_at": 1687113263,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": null

In [None]:
if fine_tune_response.fine_tuned_model != None:
    fine_tuned_model = fine_tune_response.fine_tuned_model

In [27]:
# if fine_tune_response.fine_tuned_model == None:
#     fine_tuned_model = openai.FineTune.retrieve(id=fine_tune_response.id).fine_tuned_model

fine_tuned_model = openai.FineTune.retrieve(id=fine_tune_response.id).fine_tuned_model


In [28]:
# export model
fine_tuned_model = openai.FineTune.retrieve(id=fine_tune_response.id).fine_tuned_model

None
