In [6]:
import os
os.chdir("./project/Turtle_Soup")

In [73]:
import pandas as pd
import re
import sys
import os
import json
from collections import defaultdict
import warnings
warnings.filterwarnings("ignore")

## Build Json

In [176]:
# r'.' not include \n
def remove_(df):
    df = df.str.replace(r'\n', ' ')
    df = df.apply(lambda x: re.sub(r" \(.*?\)", "", x))
    df = df.str.replace(r'[0-9].*?  ', "")
    df = df.str.replace(r'[0-9].*?answer:  ', "")
    df = df.str.replace(r'\s+', ' ').str.replace(r'Variant: ', '')
    df = df.str.replace(r'\\',"")
    return df.str.lstrip().str.rstrip()


In [177]:
data = pd.read_csv("./data/kith_dataset.csv", index_col=0)
data.prompt = remove_(data.prompt)
data.answer = remove_(data.answer)
data

Unnamed: 0,id,prompt,answer
0,0,"A man goes into a restaurant, orders abalone, ...",The man was in a ship that was wrecked on a de...
1,1,same problem statement but with albatross inst...,"In this version, the man was in a lifeboat, wi..."
2,2,A man kills himself rather than order albatross.,The man already knew that he had eaten human f...
3,3,A man lives on the twelfth floor of an apartme...,The man is a midget. He can't reach the upper ...
4,4,Emily regularly visits the twelfth floor of an...,Emily is a child; she can only reach the tenth...
5,5,A man sitting on a park bench reads a newspape...,The man is a travel agent. He had sold someone...
6,6,A man lets go of a bowling ball. A short while...,A physics professor is demonstrating conservat...
7,7,A man is born in 1972 and dies in 1952 at the ...,He's born in room number The numbers can of co...
8,8,A man is found dead in the arctic with a pack ...,It's a wolf pack; they've killed and eaten the...
9,9,A man pushes a car up to a hotel and tells the...,It's a game of Monopoly.


In [178]:
data.to_csv("./data/kith_cleaned_dataset.csv", index=None)

In [216]:
def create_finetuning_kith_data(df, filename):
    pattern = r',|\.|!|;'
    fine_tuning_data = []

    for idx, row in df.iterrows():
        prompt_start = ",".join(re.split(pattern, row["answer"])[:2])
        prompt_end = row["prompt"]
        answer = row["answer"]

        data = {}
        data['prompt'] = f"[STORY START] {prompt_start}\n" +\
                         f"[STORY END] {prompt_end}\n"
        data['completion'] = f"[STORY MIDDLE] {answer}\n" + "[END]"
        fine_tuning_data.append(data)

    with open(filename, 'w') as out:
        for data in fine_tuning_data:
            out.write(json.dumps(data))
            out.write('\n')

jsonl_filename='fine_tune_kith_v4.jsonl'
create_finetuning_kith_data(data, jsonl_filename)

In [217]:
!head '{jsonl_filename}'
!wc -lw '{jsonl_filename}'

{"prompt": "[STORY START] The man was in a ship that was wrecked on a desert island, When there was no food left\n[STORY END] A man goes into a restaurant, orders abalone, eats one bite, and kills himself.\n", "completion": "[STORY INFERENCE] The man was in a ship that was wrecked on a desert island. When there was no food left, another passenger brought what he said was abalone but was really part of the man's wife. The man suspects something fishy, so when they finally return to civilization, he orders abalone, realizes that what he ate before was his wife, and kills himself.\n[END]"}
{"prompt": "[STORY START] In this version, the man was in a lifeboat\n[STORY END] same problem statement but with albatross instead of abalone.\n", "completion": "[STORY INFERENCE] In this version, the man was in a lifeboat, with his wife, who died. He hallucinated an albatross landing in the boat which he caught and killed and ate; he thought that his wife had been washed overboard. When he actually e

## Finetune

In [None]:
import os
import openai

print('Enter OpenAI API key:')
openai.api_key = input()

os.environ['OPENAI_API_KEY']=openai.api_key

In [218]:
!openai api fine_tunes.create -t '{jsonl_filename}' -m davinci
#!openai api fine_tunes.create -t '{jsonl_filename}' -m davinci

Logging requires wandb to be installed. Run `pip install wandb`.
Upload progress: 100%|████████████████████| 7.22k/7.22k [00:00<00:00, 5.56Mit/s]
Uploaded file from fine_tune_kith_v4.jsonl: file-BV2hUvn0gfP92uCr5AOxD1dd
Created fine-tune: ft-AzlRmKQVs2WqXiSFWNr7sxkW
Streaming events until fine-tuning is complete...

(Ctrl-C will interrupt the stream, but not cancel the fine-tune)
[2022-04-05 19:59:15] Created fine-tune: ft-AzlRmKQVs2WqXiSFWNr7sxkW
[2022-04-05 19:59:23] Fine-tune costs $0.20
[2022-04-05 19:59:23] Fine-tune enqueued. Queue number: 2
^C


In [221]:
!openai api fine_tunes.follow -i ft-AzlRmKQVs2WqXiSFWNr7sxkW

Logging requires wandb to be installed. Run `pip install wandb`.
[2022-04-05 19:59:15] Created fine-tune: ft-AzlRmKQVs2WqXiSFWNr7sxkW
[2022-04-05 19:59:23] Fine-tune costs $0.20
[2022-04-05 19:59:23] Fine-tune enqueued. Queue number: 2
[2022-04-05 20:06:41] Fine-tune is in the queue. Queue number: 0
[2022-04-05 20:22:53] Fine-tune started
[2022-04-05 20:37:52] Completed epoch 1/4
[2022-04-05 20:37:57] Completed epoch 2/4
[2022-04-05 20:38:03] Completed epoch 3/4
[2022-04-05 20:38:09] Completed epoch 4/4
[2022-04-05 20:38:40] Uploaded model: davinci:ft-personal-2022-04-06-00-38-38
[2022-04-05 20:38:44] Uploaded result file: file-ysRetMLaRrGc0GqUiZhTrkvZ
[2022-04-05 20:38:44] Fine-tune succeeded

Job complete! Status: succeeded 🎉
Try out your fine-tuned model:

openai api completions.create -m davinci:ft-personal-2022-04-06-00-38-38 -p <YOUR_PROMPT>


## Test

In [225]:
def generate_finetuned_kith(prompt_start, prompt_end, max_tokens=256):
  response = openai.Completion.create(
    model='davinci:ft-personal-2022-04-06-00-38-38',
    prompt=f"[STORY START] {prompt_start}\n"+ f"[STORY END] {prompt_end}\n",
    temperature=0.7,
    max_tokens=max_tokens,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=['[END]']
  )

  # Make an API call to GPT3
  turn = response['choices'][0]['text']
  return turn


In [231]:
print(generate_finetuned_kith(
    prompt_start="The man was in a ship that was wrecked on a desert island, When there was no food left",
    prompt_end="A man goes into a restaurant, orders abalone, eats one bite, and kills himself"))

RateLimitError: That model is still being loaded. Please try again shortly.