In [None]:
!pip install openai datasets



In [None]:
import json

def read_json_file(filename):
    with open(filename, 'r') as file:
        return json.load(file)

# Read the files
parent = 'Poast_Data/'
roon_data = read_json_file(parent + 'roon.json')
growing_daniel_data = read_json_file(parent + 'growing_daniel.json')
pajeet_bateman_data = read_json_file(parent + 'pajeet_bateman.json')
tenxer_data = read_json_file(parent + '10xer.json')

# Merge the dictionaries
merged_data = {**roon_data, **growing_daniel_data, **pajeet_bateman_data, **tenxer_data}

In [None]:
ds_formatted = [
    {"messages": [
        {"role": "system", "content": "You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology."},
        {'role': 'user', 'content': key},
        {'role': 'assistant', 'content': value}]} for key, value in merged_data.items()
]

In [None]:
len(ds_formatted)

185

In [None]:
ds_formatted[0]

{'messages': [{'role': 'system',
   'content': 'You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology.'},
  {'role': 'user', 'content': 'Write a tweet about passion in biotech'},
  {'role': 'assistant',
   'content': 'people do biotech for the love of the game. terrible business to be in otherwise.'}]}

In [None]:
import random
random.shuffle(ds_formatted)

In [None]:
ds_train = ds_formatted[:140]
ds_val = ds_formatted[140:]

In [None]:
import json

with open('train.jsonl', 'w') as f:
    for line in ds_train:
        json.dump(line, f)
        f.write('\n')

with open('val.jsonl', 'w') as f:
    for line in ds_val:
        json.dump(line, f)
        f.write('\n')

In [None]:
ls

10xer.json           pajeet_bateman.json  [0m[01;34msample_data[0m/  val.jsonl
growing_daniel.json  roon.json            train.jsonl


In [None]:
!head -n 2 train.jsonl

{"messages": [{"role": "system", "content": "You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology."}, {"role": "user", "content": "Write a tweet about a humorous observation regarding one's online impact"}, {"role": "assistant", "content": "the point of my account is to make respectable machine learning scientists see boosted tweets from 'PowerBottomDad' and 'BreastMilkEnjoyer' in their feed"}]}
{"messages": [{"role": "system", "content": "You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology."}, {"role": "user", "content": "Write a tweet about suburban governance"}, {"role": "assistant", "content": "i don't think local governance is sacrosanct in any shape or form. suburbs reap massive positive externalities from the city they encircle and owe their existence to it. for land use in the US to be sane there needs to be loyalty t

In [None]:
!head -n 2 val.jsonl

{"messages": [{"role": "system", "content": "You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology."}, {"role": "user", "content": "Write a tweet reminiscing college engineering experiences"}, {"role": "assistant", "content": "are hackathons still a thing? when I was in college it was incredibly fun to not sleep for 36 hrs and write garbage code followed by a hard coded demo. definitely took some years off my life but really changed my perspective on what engineering is really about"}]}
{"messages": [{"role": "system", "content": "You are a schizophrenic poaster from Twitter. You are unhinged and tweet overly verbose yet cogent updates on the state of technology."}, {"role": "user", "content": "Write a tweet about the impact of technological inventions on wars"}, {"role": "assistant", "content": "the scale of destruction of the world wars was made possible by the inventions of industrial chemistry, tanks, a

# Upload data

In [None]:
import openai

openai.api_key = 'YOUR API KEY' 


In [None]:
# Upload training data
train = openai.File.create(
  file=open("train.jsonl", "rb"),
  purpose='fine-tune'
)
train

<File file id=file-UJenw1sq2Nmk5uVDT5PbI0L9 at 0x7851a42e59e0> JSON: {
  "object": "file",
  "id": "file-UJenw1sq2Nmk5uVDT5PbI0L9",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 67276,
  "created_at": 1693685909,
  "status": "uploaded",
  "status_details": null
}

In [None]:
train_id = train['id']

In [None]:
# Upload validation data
val = openai.File.create(
  file=open("val.jsonl", "rb"),
  purpose='fine-tune'
)
val

<File file id=file-9LDxUflvp9KqmyzL5ES4NdMr at 0x78519410cef0> JSON: {
  "object": "file",
  "id": "file-9LDxUflvp9KqmyzL5ES4NdMr",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 21788,
  "created_at": 1693685911,
  "status": "uploaded",
  "status_details": null
}

In [None]:
val_id = val['id']

# You need to wait for Open-AI to register the training and val datasets. It takes 2-3 minutes.

In [None]:
import time
time.sleep(180)

# Fine-tuning

In [None]:
# Create a fine-tuned model
response = openai.FineTuningJob.create(
    training_file=train_id,
    validation_file=val_id,
    model="gpt-3.5-turbo"
    )
response

<FineTuningJob fine_tuning.job id=ftjob-oLni7wKpfR69g4xkVYxoPVSB at 0x785194126e30> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-oLni7wKpfR69g4xkVYxoPVSB",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693686633,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-Qbxm8IYGH5JuxbDihd36NlvU",
  "result_files": [],
  "status": "created",
  "validation_file": "file-9LDxUflvp9KqmyzL5ES4NdMr",
  "training_file": "file-UJenw1sq2Nmk5uVDT5PbI0L9",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

In [None]:
job_id = response['id']

In [None]:
# Retrieve the state of a fine-tune
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-oLni7wKpfR69g4xkVYxoPVSB at 0x7851ad5ad3f0> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-oLni7wKpfR69g4xkVYxoPVSB",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693686633,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-Qbxm8IYGH5JuxbDihd36NlvU",
  "result_files": [],
  "status": "running",
  "validation_file": "file-9LDxUflvp9KqmyzL5ES4NdMr",
  "training_file": "file-UJenw1sq2Nmk5uVDT5PbI0L9",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

In [None]:
# Retrieve the state of a fine-tune
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-oLni7wKpfR69g4xkVYxoPVSB at 0x785194125170> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-oLni7wKpfR69g4xkVYxoPVSB",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693686633,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-Qbxm8IYGH5JuxbDihd36NlvU",
  "result_files": [],
  "status": "running",
  "validation_file": "file-9LDxUflvp9KqmyzL5ES4NdMr",
  "training_file": "file-UJenw1sq2Nmk5uVDT5PbI0L9",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

In [None]:
# List up to 10 events from a fine-tuning job
response = openai.FineTuningJob.list_events(id=job_id, limit=10)
response

<OpenAIObject list at 0x7851941d7150> JSON: {
  "object": "list",
  "data": [
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-O2jSGpHXfMir8vq8l9xpAQy7",
      "created_at": 1693686633,
      "level": "info",
      "message": "Fine tuning job started",
      "data": null,
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-2Hp5wKdWQVSGjxNGdVJlV3NS",
      "created_at": 1693686633,
      "level": "info",
      "message": "Created fine-tune: ftjob-oLni7wKpfR69g4xkVYxoPVSB",
      "data": null,
      "type": "message"
    }
  ],
  "has_more": false
}

In [None]:
events = response["data"]
events.reverse()

for event in events:
    print(event["message"])

Created fine-tune: ftjob-oLni7wKpfR69g4xkVYxoPVSB
Fine tuning job started


In [None]:
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-oLni7wKpfR69g4xkVYxoPVSB at 0x7851a42e6de0> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-oLni7wKpfR69g4xkVYxoPVSB",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693686633,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-Qbxm8IYGH5JuxbDihd36NlvU",
  "result_files": [],
  "status": "running",
  "validation_file": "file-9LDxUflvp9KqmyzL5ES4NdMr",
  "training_file": "file-UJenw1sq2Nmk5uVDT5PbI0L9",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

# In 5 minutes ish you'll get an email with instructions to load the model into Playground. 