## Install Dependencies

In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m71.7/76.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.2
    Uninstalling openai-1.52.2:
      Successfully uninstalled openai-1.52.2
Successfully installed openai-0.28.0


## Import Libraries

In [None]:
import openai
import os
import json
from google.colab import drive, runtime

## Mount Google Drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


## Access Data

In [None]:
data = '/content/drive/MyDrive/Taboo data/en'

## Load Data

In [None]:
taboo_data = {}

# Load each JSON file
for filename in os.listdir(data):
    if filename.endswith(".json"):
        category = filename.split('.')[0]  # e.g., 'animals', 'cars'
        with open(os.path.join(data, filename), 'r') as file:
            taboo_data[category] = json.load(file)

# Display data structure
print(f"Categories loaded: {list(taboo_data.keys())}")

Categories loaded: ['literature', 'food', 'tv', 'sports', 'cars', 'web', 'animals', 'city-country', 'people', 'things']


## Prepare Training Data

In [None]:
# Store prompt-completion pairs for training
training_data = []

for category, words in taboo_data.items():
    for word, taboo_words in words.items():
        prompt = f"Describe '{word}' without using the words: {', '.join(taboo_words)}."
        # Placeholder/generic description
        completion = f"A description for '{word}'."
        training_data.append({'prompt': prompt, 'completion': completion})

# Display updated sample training data
print("Sample training data:", training_data[:3])

Sample training data: [{'prompt': "Describe '1984' without using the words: George Orwell.", 'completion': "A description for '1984'."}, {'prompt': "Describe 'A Doll's House' without using the words: Henrik Ibsen.", 'completion': "A description for 'A Doll's House'."}, {'prompt': "Describe 'Absalom, Absalom!' without using the words: William Faulkner.", 'completion': "A description for 'Absalom, Absalom!'."}]


## Save Training Data in JSONL Format

In [None]:
output_path = '/content/drive/MyDrive/Taboo data/training_data.jsonl'


# Save data in JSONL format
with open(output_path, 'w') as outfile:
    for entry in training_data:
        json.dump(entry, outfile)
        outfile.write('\n')

print(f"Training data saved to {output_path}")

Training data saved to /content/drive/MyDrive/Taboo data/training_data.jsonl


## Set up OpenAI API Key

In [None]:
from google.colab import userdata

OPENAI_API_KEY = userdata.get("OPEN_API_KEY")
openai.api_key = OPENAI_API_KEY

## Upload Training file to OpenAI

In [None]:
with open(output_path, "rb") as file:
    training_file = openai.File.create(
        file=file,
        purpose="fine-tune"
    )

print("Training file ID:", training_file["id"])

Training file ID: file-nuYtVcsBqcs2XT45EQXsyOAU


## Start the Fine-Tuning Job

In [None]:
fine_tune_response = openai.FineTune.create(
    training_file=training_file["id"],
    model="gpt-4o-2024-08-06",
    suffix="TabooGPT-4o"
)

print("Fine-tuning job details:", fine_tune_response)

InvalidRequestError: Unknown request URL: POST /v1/fine-tunes. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.

## Monitor the Fine-Tuning Job Status (periodic run while job is in progress)

In [None]:
fine_tune_job_id = fine_tune_response["id"]
status_response = openai.FineTune.retrieve(id=fine_tune_job_id)
print("Fine-tuning job status:", status_response)