Based on https://github.com/openai/openai-cookbook/blob/main/examples/Fine-tuned_classification.ipynb

In [None]:
!pip install gdown
!pip install --upgrade openai
!pip install transformers
!pip install wandb

In [None]:
!wandb login
%env OPENAI_API_KEY="MY_API_KEY"

# Dataset

In [None]:
# Download train and test datasets
!gdown 1je2h8QdkzC2hhBl-Mqy0lPYSKs5-Buwp
!gdown 1jFXMCf0QM-QdBJnExDis8sh_BLuiEPjD

In [None]:
import pandas as pd

# Our choices for labels and separator based on the default recommendations from OpenAI; there might be better choices
pos_label = " bad"
neg_label = " good"
separator = "\n\n###\n\n"


df_train = pd.read_csv("train.csv")
df_train.loc[df_train['label'] == 1, 'label'] = pos_label
df_train.loc[df_train['label'] == 0, 'label'] = neg_label
df_train.columns = ['prompt', 'completion']

# Add custom separator at the end of the prompts
for i, input in enumerate(df_train['prompt']):
    df_train['prompt'][i] = input + separator

display(df_train)
df_train.to_json("moraluncertainty_train.jsonl", orient='records', lines=True)

In [None]:
!rm moraluncertainty_train_prepared_*.jsonl
!openai tools fine_tunes.prepare_data -f moraluncertainty_train.jsonl -q

In [None]:
!tail moraluncertainty_train_prepared_train.jsonl

# Fine-tune

In [None]:
!openai api fine_tunes.create -t "moraluncertainty_train_prepared_train.jsonl" -v "moraluncertainty_train_prepared_valid.jsonl" --compute_classification_metrics --classification_positive_class " bad" -m davinci --n_epochs 4

In [None]:
# Set IDs from the above output
%env FT_RUN_ID=MY_FT_RUN_ID
%env FT_MODEL_ID=MY_FT_MODEL_ID

In [None]:
# If you need to resume monitoring
!openai api fine_tunes.follow -i $FT_RUN_ID

In [None]:
!openai wandb sync --project moral-uncertainty-gpt-3

In [None]:
# Check train and validation results
!openai api fine_tunes.results -i $FT_RUN_ID > result.csv

results = pd.read_csv('result.csv')
print(results[results['classification/accuracy'].notnull()])
results[results['classification/accuracy'].notnull()]['classification/accuracy'].plot()

# Test set

In [None]:
import pandas as pd

df_test = pd.read_csv("test.csv")
df_test.columns = ['prompt']
assert len(df_test) == 2771
df_test

In [None]:
# Run inference for all test examples

import os
import openai
from tqdm import tqdm
from transformers import GPT2TokenizerFast

# The model that we want to run inference on
ft_model = os.environ["FT_RUN_ID"]

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# Set logit_bias to 100 for our two output classes to ensure the model only predicts these two options
tokenized_labels = [tokenizer.encode(label)[0] for label in [pos_label, neg_label]]
logit_bias = {key: 100 for key in tokenized_labels}
# Calculate how many input tokens we can afford for the input prompts (GPT context length is 2049)
separator_toks = tokenizer.encode(separator)
max_toks = 2049 - len(separator_toks) - 1 # input prompt = max context - separator - completion

res_list = []
for i in tqdm(range(len(df_test))):
    input_prompt = df_test['prompt'][i]
    truncated_prompt = tokenizer.decode(tokenizer.encode(input_prompt)[:max_toks]) # Fit into max token length

    # Run completion on each output
    res = openai.Completion.create(model=ft_model, prompt=truncated_prompt + separator, max_tokens=1, temperature=0, logprobs=2, logit_bias=logit_bias)
    res_list.append(res)

In [None]:
# Convert completion results into 0-1 prediction scores

import numpy as np

scores = []
for res in res_list:
    # Extract logprobs for the two output classes
    choice, top_logprobs = res['choices'][0]['text'], res['choices'][0]['logprobs']['top_logprobs'][0]
    logprobs = [top_logprobs[label] for label in [pos_label, neg_label]]
    # Convert logprobs into probs
    probs = [np.exp(lp) for lp in logprobs]
    assert np.isclose(np.sum(probs), 1)
    scores.append(probs[0])
assert len(scores) == 2771

In [None]:
# Save predictions to file

from pathlib import Path
import pandas as pd

data_dir = Path(".")

outfile = data_dir / f"predictions_gpt3_{ft_model}.csv"
assert not outfile.exists(), f"{outfile} already exists!"
scores = np.array(scores)
out_class = scores > 0.5 # Binary classification
out_uncertainty = np.minimum(scores, 1 - scores) # Uncertainty score is just how close we are to 0.5
pd.DataFrame({
    'class': out_class,
    'uncertainty': out_uncertainty,
}).to_csv(outfile, index=False)
print("Saved to", outfile)