<a href="https://colab.research.google.com/github/JeremyAlain/imitation_learning_from_language_feedback/blob/feature%2Fadd_all_relevant_files/finetune_gpt3_reward_model_public.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade openai wandb
!pip install -Uqq ipdb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openai
  Downloading openai-0.25.0.tar.gz (44 kB)
[K     |████████████████████████████████| 44 kB 1.8 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting wandb
  Downloading wandb-0.13.7-py2.py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 7.9 MB/s 
Collecting pandas-stubs>=1.1.0.11
  Downloading pandas_stubs-1.5.2.221213-py3-none-any.whl (147 kB)
[K     |████████████████████████████████| 147 kB 38.7 MB/s 
Collecting types-pytz>=2022.1.1
  Downloading types_pytz-2022.7.0.0-py3-none-any.whl (4.7 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp3

In [None]:
import pandas as pd 
from google.colab import drive 
import openai 
import wandb
from openai.wandb_logger import WandbLogger
from pathlib import Path
from typing import List, Union, Dict, Any
import os
import itertools
import numpy as np

import ipdb


root_path = "/content/drive/MyDrive/training_language_models_with_language_feedback_at_scale"
drive.mount('/content/drive', force_remount=True)
openai.api_key=""
%env OPENAI_API_KEY=

Mounted at /content/drive
env: OPENAI_API_KEY=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


In [None]:
def upload_dataset_to_openai(dataset_path: str) -> Dict[str, str]: 
  dataset_path = os.path.join(root_path, dataset_path)
  assert Path(dataset_path).exists(), dataset_path
  train_object = openai.File.create(
      file=open(dataset_path), 
      purpose="fine-tune"
  )
  return train_object["id"]

In [None]:
def initialize_gpt3_finetuning_job(config: Dict[str, Any], train_dataset_id: str,validation_dataset_id: str, finetuning_summaries: List[str], finetuning_ids: List[str], positive_class: str, suffix: str, use_all_hyperparameters: bool=True) -> None:
  if use_all_hyperparameters:   
    finetuning_summary = openai.FineTune.create(
                          training_file=train_dataset_id, 
                          validation_file=validation_dataset_id,
                          model="davinci",
                          learning_rate_multiplier=config["learning_rate_multiplier"],
                          prompt_loss_weight=config["prompt_loss_weight"], 
                          n_epochs=config["n_epochs"], 
                          batch_size=config["batch_size"], 
                          compute_classification_metrics=True,
                          classification_n_classes=2, 
                          classification_positive_class=positive_class,
                          suffix=suffix)
  else: 
    if "batch_size" in config or "learning_rate_multiplier" in config: 
      raise NotImplementedError()
    finetuning_summary = openai.FineTune.create(
                          training_file=train_dataset_id, 
                          validation_file=validation_dataset_id,
                          model="davinci",
                          n_epochs=config["n_epochs"], 
                          prompt_loss_weight=config["prompt_loss_weight"], 
                          compute_classification_metrics=True,
                          classification_n_classes=2, 
                          classification_positive_class=positive_class,
                          suffix=suffix)
  finetuning_summaries.append(finetuning_summary)
  finetuning_ids.append(finetuning_summary["id"])
  print("Initialized job", finetuning_summary['id'])
  print("with config", config)
  print("\n\n")
  return finetuning_summaries, finetuning_ids

In [None]:
def kill_all_active_jobs():
  for ft in openai.FineTune.list()['data']:
    if ft['status'] == 'pending' or ft['status'] == 'running':
      print("Found", ft['id'], ft['status'], "... killing.")
      openai.FineTune.cancel(id=ft['id'])
  print("No more active jobs.")

In [None]:
def show_all_active_jobs():
  for ft in openai.FineTune.list()['data']:
    if ft['status'] == 'pending' or ft['status'] == 'running':
      print("Found", ft['id'], ft['status'])
  print("No more active jobs.")

In [None]:
def show_job_status_of_job_ids(job_ids: List[str]=None): 
  for finetuning_id in openai.FineTune.list()["data"]: 
    if job_ids and finetuning_id['id'] not in job_ids:
      continue
    print(finetuning_id['id'], finetuning_id['status'])

In [None]:
def build_parameter_sets(sweep_parameters: Dict[str, Any]): 
  list_of_parameters = []
  list_of_values = []
  for parameter, value_list in sweep_parameters.items():
    list_of_parameters.append(parameter)
    list_of_values.append(value_list)

  all_combinations_of_values = itertools.product(*list_of_values)

  parameter_sets = []
  for value_combinations in all_combinations_of_values:
    parameterset = {parameter_name: value for parameter_name, value in zip(list_of_parameters, value_combinations)}
    parameter_sets.append(parameterset)
  return parameter_sets

# GPT-3 Reward Model - Comparison

## Prompt Loss Weight Tuning on 1K Dataset

In [None]:
%pdb off

Automatic pdb calling has been turned OFF


In [None]:
sweep_tag = "reward_model_comparison_sweep"
sweep_parameters = {
                    "prompt_loss_weight":[0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5], 
}
suffix = "reward_model_comparison"

In [None]:
parameter_sets_for_sweep = build_parameter_sets(sweep_parameters)
print("Number of configs", len(parameter_sets_for_sweep))
for config in parameter_sets_for_sweep:
  print(config)

Number of configs 7
{'prompt_loss_weight': 0, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.001, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.005, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.01, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.05, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.1, 'suffix': 'reward_model_comparison_sweep'}
{'prompt_loss_weight': 0.5, 'suffix': 'reward_model_comparison_sweep'}


In [None]:
train_comparison_dataset_path = "summarization_finetuning_datasets/reward_model_comparison_finetuning_dataset_train_1000.jsonl"
train_comparison_dataset_id = upload_dataset_to_openai(train_comparison_dataset_path)
dev_comparison_dataset_path = "summarization_finetuning_datasets/reward_model_comparison_finetuning_dataset_validation_200.jsonl"
development_comparison_dataset_id = upload_dataset_to_openai(dev_comparison_dataset_path)
print("Train dataset id {}".format(train_comparison_dataset_id))
print("Validation dataset id {}".format(development_comparison_dataset_id))

Train dataset id file-jv1mB4uAktRCps2RFCXPYKFD
Validation dataset id file-0TGS9QU0r5K689NOggUkw46U


In [None]:
finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = [], []
for parameter_set in parameter_sets_for_sweep: 
  finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = initialize_gpt3_finetuning_job(config=parameter_set, train_dataset_id=train_comparison_dataset_id, validation_dataset_id=development_comparison_dataset_id, finetuning_summaries=finetuning_summaries_hp_tuning, finetuning_ids=finetuning_ids_hp_tuning, positive_class=" A", suffix=suffix, use_all_hyperparameters=False)
print(finetuning_ids_hp_tuning)

Initialized job ft-gKrIAUcfIbNIq4276lbIIZTn
with config {'prompt_loss_weight': 0, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-gW5yYQ8CTC42EXCFdikeTnmH
with config {'prompt_loss_weight': 0.001, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-QmnVVQ1XzciWD5LDMGH3tiNU
with config {'prompt_loss_weight': 0.005, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-KiUIQloVoJotOoop1t770ptl
with config {'prompt_loss_weight': 0.01, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-coRZDkwVEw2AIa13qskuKJM5
with config {'prompt_loss_weight': 0.05, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-qQ0QUf8ACalGMg9QVEHSqurB
with config {'prompt_loss_weight': 0.1, 'suffix': 'reward_model_comparison_sweep'}



Initialized job ft-we6h6rvw0wOeVthCmio9MyWh
with config {'prompt_loss_weight': 0.5, 'suffix': 'reward_model_comparison_sweep'}



['ft-gKrIAUcfIbNIq4276lbIIZTn', 'ft-gW5yYQ8CTC42EXCFdikeTnmH', 'ft-QmnVVQ1XzciWD5LDMGH

In [None]:
!openai api fine_tunes.get -i "ft-QmnVVQ1XzciWD5LDMGH3tiNU"

{
  "created_at": 1669798212,
  "events": [
    {
      "created_at": 1669798212,
      "level": "info",
      "message": "Created fine-tune: ft-QmnVVQ1XzciWD5LDMGH3tiNU",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669807231,
      "level": "info",
      "message": "Fine-tune costs $53.45",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669807232,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 2",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669807232,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 2",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669807409,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 1",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669807501,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 0",
      "object": "fine-tune-eve

In [None]:
for finetuning_id in finetuning_ids_hp_tuning:
  print(f"Monitoring {finetuning_id} ...")
  !openai api fine_tunes.follow -i {finetuning_id}
  print("\n\n")

Monitoring ft-gKrIAUcfIbNIq4276lbIIZTn ...
[2022-11-30 08:50:12] Created fine-tune: ft-gKrIAUcfIbNIq4276lbIIZTn
[2022-11-30 08:50:17] Fine-tune costs $53.45
[2022-11-30 08:50:18] Fine-tune enqueued. Queue number: 2
[2022-11-30 08:52:42] Fine-tune is in the queue. Queue number: 1
[2022-11-30 08:55:39] Fine-tune is in the queue. Queue number: 0
[2022-11-30 09:07:07] Fine-tune started
[2022-11-30 09:18:42] Completed epoch 1/4
[2022-11-30 09:29:14] Completed epoch 2/4
[2022-11-30 09:39:42] Completed epoch 3/4
[2022-11-30 09:50:07] Completed epoch 4/4
[2022-11-30 09:51:19] Uploaded model: davinci:ft-academicsnyuperez:reward-model-comparison-2022-11-30-09-51-19
[2022-11-30 09:51:21] Uploaded result file: file-HMzhvDEpuu4AG90r1N5MWBMJ
[2022-11-30 09:51:21] Fine-tune succeeded

Job complete! Status: succeeded 🎉
Try out your fine-tuned model:

openai api completions.create -m davinci:ft-academicsnyuperez:reward-model-comparison-2022-11-30-09-51-19 -p <YOUR_PROMPT>



Monitoring ft-gW5yYQ8CTC42E

In [None]:
for finetuning_id in finetuning_ids_hp_tuning:
  WandbLogger.sync(finetuning_id, project="training_language_models_with_langauge_feedback", entity="jerry_crea", tags=[sweep_tag])

Fine-tune ft-T54yqskSm2FXzRoctsYXMX9u has the status "running" and will not be logged


In [None]:
kill_all_active_jobs()

Found ft-Ef8jeYqwAlhSosvupxWEBqgt pending ... killing.
Found ft-sguY0RNxKWPxJ9PZYSqzQPOT pending ... killing.
Found ft-nkrbGm1GTRAX7PENTbIZyvtv pending ... killing.
Found ft-hhMe1QE3X7azOcCFiHpJ4cgH pending ... killing.
Found ft-bbQyi790SVssZH0PWrBmsHCH pending ... killing.
Found ft-zRpAcLhYImBaHsNr6A6poGc3 pending ... killing.
No more active jobs.


In [None]:
show_all_active_jobs()

No more active jobs.


## Final Parameter run 

- prompt_loss_weight = 0.0
- otherwise_default_parameters

In [None]:
sweep_tag = "reward_model_comparison_final_run"
sweep_parameters = {
                    "prompt_loss_weight":[0], 
                    "n_epochs": [1]
}
suffix = "reward_model_comparison"

In [None]:
parameter_sets_for_sweep = build_parameter_sets(sweep_parameters)
print("Number of configs", len(parameter_sets_for_sweep))
for config in parameter_sets_for_sweep:
  print(config)

Number of configs 1
{'prompt_loss_weight': 0, 'n_epochs': 1}


In [None]:
train_comparison_dataset_path = "summarization_finetuning_datasets/reward_model_comparison_finetuning_dataset_train_5000.jsonl"
train_comparison_dataset_id = upload_dataset_to_openai(train_comparison_dataset_path)
dev_comparison_dataset_path = "summarization_finetuning_datasets/reward_model_comparison_finetuning_dataset_validation_200.jsonl"
development_comparison_dataset_id = upload_dataset_to_openai(dev_comparison_dataset_path)
print("Train dataset id {}".format(train_comparison_dataset_id))
print("Validation dataset id {}".format(development_comparison_dataset_id))

Train dataset id file-2NN9jwFW5MEtWNnQap7HFMhM
Validation dataset id file-SDwGhLzwv5IaztMJuFjm8Gk9


In [None]:
finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = [], []
for parameter_set in parameter_sets_for_sweep: 
  finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = initialize_gpt3_finetuning_job(config=parameter_set, train_dataset_id=train_comparison_dataset_id, validation_dataset_id=development_comparison_dataset_id, finetuning_summaries=finetuning_summaries_hp_tuning, finetuning_ids=finetuning_ids_hp_tuning, positive_class=" A", suffix=suffix, use_all_hyperparameters=False)
print(finetuning_ids_hp_tuning)

Initialized job ft-JLH3Tzcbwc5I40l3VtSSf2i2
with config {'prompt_loss_weight': 0, 'n_epochs': 1}



['ft-JLH3Tzcbwc5I40l3VtSSf2i2']


In [None]:
!openai api fine_tunes.get -i "ft-JLH3Tzcbwc5I40l3VtSSf2i2"

{
  "created_at": 1670055024,
  "events": [
    {
      "created_at": 1670055024,
      "level": "info",
      "message": "Created fine-tune: ft-JLH3Tzcbwc5I40l3VtSSf2i2",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670055046,
      "level": "info",
      "message": "Fine-tune costs $64.41",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670055047,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670055050,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670056678,
      "level": "info",
      "message": "Completed epoch 1/1",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670056740,
      "level": "info",
      "message": "Uploaded model: davinci:ft-academicsnyuperez:reward-model-comparison-2022-12-03-08-39-00",
      "object": "fine-tune-event

In [None]:
finetuning_ids_hp_tuning = ['ft-JLH3Tzcbwc5I40l3VtSSf2i2']
for finetuning_id in finetuning_ids_hp_tuning:
  print(f"Monitoring {finetuning_id} ...")
  !openai api fine_tunes.follow -i {finetuning_id}
  print("\n\n")

Monitoring ft-JLH3Tzcbwc5I40l3VtSSf2i2 ...
[2022-12-03 08:10:24] Created fine-tune: ft-JLH3Tzcbwc5I40l3VtSSf2i2
[2022-12-03 08:10:46] Fine-tune costs $64.41
[2022-12-03 08:10:47] Fine-tune enqueued. Queue number: 0
[2022-12-03 08:10:50] Fine-tune started
[2022-12-03 08:37:58] Completed epoch 1/1
[2022-12-03 08:39:00] Uploaded model: davinci:ft-academicsnyuperez:reward-model-comparison-2022-12-03-08-39-00
[2022-12-03 08:39:01] Uploaded result file: file-SzdmrKBNekEFHmzn4VOIgn7L
[2022-12-03 08:39:01] Fine-tune succeeded

Job complete! Status: succeeded 🎉
Try out your fine-tuned model:

openai api completions.create -m davinci:ft-academicsnyuperez:reward-model-comparison-2022-12-03-08-39-00 -p <YOUR_PROMPT>





In [None]:
finetuning_ids_hp_tuning = ['ft-JLH3Tzcbwc5I40l3VtSSf2i2']
for finetuning_id in finetuning_ids_hp_tuning:
  WandbLogger.sync(finetuning_id, project="training_language_models_with_langauge_feedback", entity="jerry_crea", tags=[sweep_tag])

# Reward Model Classification

## Prompt Loss Weight Tuning on 1K Dataset

In [None]:
sweep_tag = "reward_model_classification_sweep"
sweep_parameters = {
                    "prompt_loss_weight":[0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5], 
}
suffix = "reward_model_classification"

In [None]:
parameter_sets_for_sweep = build_parameter_sets(sweep_parameters)
print("Number of configs", len(parameter_sets_for_sweep))
for config in parameter_sets_for_sweep:
  print(config)

Number of configs 7
{'prompt_loss_weight': 0}
{'prompt_loss_weight': 0.001}
{'prompt_loss_weight': 0.005}
{'prompt_loss_weight': 0.01}
{'prompt_loss_weight': 0.05}
{'prompt_loss_weight': 0.1}
{'prompt_loss_weight': 0.5}


In [None]:
train_classification_dataset_path = "summarization_finetuning_datasets/reward_model_classification_finetuning_dataset_train_1000.jsonl"
train_classification_dataset_id = upload_dataset_to_openai(train_classification_dataset_path)
dev_classification_dataset_path = "summarization_finetuning_datasets/reward_model_classification_finetuning_dataset_validation_400.jsonl"
development_classification_dataset_id = upload_dataset_to_openai(dev_classification_dataset_path)
print("Train dataset id {}".format(train_classification_dataset_id))
print("Validation dataset id {}".format(development_classification_dataset_id))

Train dataset id file-1BwTeelH9kMt2SUnrwESPz6d
Validation dataset id file-0DbupaEsn1Pt9JQDODwouOde


In [None]:
finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = [], []
for parameter_set in parameter_sets_for_sweep: 
  finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = initialize_gpt3_finetuning_job(config=parameter_set, train_dataset_id=train_classification_dataset_id, validation_dataset_id=development_classification_dataset_id, finetuning_summaries=finetuning_summaries_hp_tuning, finetuning_ids=finetuning_ids_hp_tuning, positive_class=" Yes", suffix=suffix, use_all_hyperparameters=False)
print(finetuning_ids_hp_tuning)

Initialized job ft-gA3vJ1BjYlYc6h0JyIhxLEO5
with config {'prompt_loss_weight': 0}



Initialized job ft-u9JiZFstyp5LuqvhkLaKgHHF
with config {'prompt_loss_weight': 0.001}



Initialized job ft-5antcrreE5U0cZbUDtvKntf1
with config {'prompt_loss_weight': 0.005}



Initialized job ft-SIP7jF1EE1S6EO8H13PS7PCs
with config {'prompt_loss_weight': 0.01}



Initialized job ft-AgFFLHBxNPjxFxUdj9FloaK7
with config {'prompt_loss_weight': 0.05}



Initialized job ft-KL2DlrPjFYwJ517NoIJOz9G3
with config {'prompt_loss_weight': 0.1}



Initialized job ft-ZdWDUbAnwZ4JDw8fwJRWjV4E
with config {'prompt_loss_weight': 0.5}



['ft-gA3vJ1BjYlYc6h0JyIhxLEO5', 'ft-u9JiZFstyp5LuqvhkLaKgHHF', 'ft-5antcrreE5U0cZbUDtvKntf1', 'ft-SIP7jF1EE1S6EO8H13PS7PCs', 'ft-AgFFLHBxNPjxFxUdj9FloaK7', 'ft-KL2DlrPjFYwJ517NoIJOz9G3', 'ft-ZdWDUbAnwZ4JDw8fwJRWjV4E']


In [None]:
!openai api fine_tunes.get -i "ft-KL2DlrPjFYwJ517NoIJOz9G3"

{
  "created_at": 1669893198,
  "events": [
    {
      "created_at": 1669893198,
      "level": "info",
      "message": "Created fine-tune: ft-KL2DlrPjFYwJ517NoIJOz9G3",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669962750,
      "level": "info",
      "message": "Fine-tune costs $99.60",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669962750,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 22",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669964332,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 21",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669964654,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 20",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669964848,
      "level": "info",
      "message": "Fine-tune is in the queue. Queue number: 19",
      "object": "fine-tune

In [None]:
finetuning_ids_hp_tuning = ['ft-gA3vJ1BjYlYc6h0JyIhxLEO5', 'ft-u9JiZFstyp5LuqvhkLaKgHHF', 'ft-5antcrreE5U0cZbUDtvKntf1', 'ft-SIP7jF1EE1S6EO8H13PS7PCs', 'ft-AgFFLHBxNPjxFxUdj9FloaK7', 'ft-KL2DlrPjFYwJ517NoIJOz9G3', 'ft-ZdWDUbAnwZ4JDw8fwJRWjV4E']
for finetuning_id in finetuning_ids_hp_tuning:
  print(f"Monitoring {finetuning_id} ...")
  !openai api fine_tunes.follow -i {finetuning_id}
  print("\n\n")

Monitoring ft-gA3vJ1BjYlYc6h0JyIhxLEO5 ...
[2022-12-01 11:13:16] Created fine-tune: ft-gA3vJ1BjYlYc6h0JyIhxLEO5
[2022-12-01 11:13:27] Fine-tune costs $99.60
[2022-12-01 11:13:28] Fine-tune enqueued. Queue number: 20
[2022-12-01 11:13:59] Fine-tune is in the queue. Queue number: 19
[2022-12-01 11:15:12] Fine-tune is in the queue. Queue number: 18
[2022-12-01 11:17:39] Fine-tune is in the queue. Queue number: 17
[2022-12-01 11:22:08] Fine-tune is in the queue. Queue number: 16
[2022-12-01 11:30:19] Fine-tune is in the queue. Queue number: 15
[2022-12-01 11:32:16] Fine-tune is in the queue. Queue number: 14
[2022-12-01 11:35:35] Fine-tune is in the queue. Queue number: 13
[2022-12-01 11:36:15] Fine-tune is in the queue. Queue number: 12
[2022-12-01 11:43:28] Fine-tune is in the queue. Queue number: 11
[2022-12-01 11:44:18] Fine-tune is in the queue. Queue number: 10
[2022-12-01 11:45:44] Fine-tune is in the queue. Queue number: 9
[2022-12-01 11:48:00] Fine-tune is in the queue. Queue numb

In [None]:
for finetuning_id in finetuning_ids_hp_tuning:
  WandbLogger.sync(finetuning_id, project="training_language_models_with_langauge_feedback", entity="jerry_crea", tags=[sweep_tag])

Fine-tune ft-gA3vJ1BjYlYc6h0JyIhxLEO5 has already been logged successfully at https://wandb.ai/jerry_crea/training_language_models_with_langauge_feedback/runs/ft-gA3vJ1BjYlYc6h0JyIhxLEO5
Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run
Fine-tune ft-u9JiZFstyp5LuqvhkLaKgHHF has already been logged successfully at https://wandb.ai/jerry_crea/training_language_models_with_langauge_feedback/runs/ft-u9JiZFstyp5LuqvhkLaKgHHF
Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run
Fine-tune ft-5antcrreE5U0cZbUDtvKntf1 has already been logged successfully at https://wandb.ai/jerry_crea/training_language_models_with_langauge_feedback/runs/ft-5antcrreE5U0cZbUDtvKntf1
Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run
Fine-tune ft-SIP7jF1EE1S6EO8H13PS7PCs has already been logged successfully at https://wandb.ai/jerry_crea/training_language_models_with_langauge_feedback/runs/ft-SIP7jF1EE1

0,1
classification/accuracy,▅▁▇█
classification/auprc,▁▄▃█
classification/auroc,▁▄▄█
classification/f1.0,█▇▁▄
classification/precision,▃▁██
classification/recall,▇█▁▂
elapsed_examples,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
elapsed_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_loss,█▇▅▆▇▆▄▆▇▇▅▆▇▅▂▅▃▄▆▄▃▂▄▂▃▄▂▂▄▅▁▂▂▂▃▃▂▁▂▃
training_sequence_accuracy,▁▃▁▃▁▆▁▃▃▆▆▃▁▃██▁▆▁▆▃▃▁▃▃▃▆▁▃▃██▆▁▁▁▁▃▁▃

0,1
classification/accuracy,0.5775
classification/auprc,0.60085
classification/auroc,0.6172
classification/f1.0,0.62528
classification/precision,0.56175
classification/recall,0.705
elapsed_examples,8004.0
elapsed_tokens,4094148.0
fine_tuned_model,davinci:ft-academics...
status,succeeded


0,1
classification/accuracy,▂▁█▇
classification/auprc,▃▁█▄
classification/auroc,▃▁█▇
classification/f1.0,██▂▁
classification/precision,▂▁█▇
classification/recall,██▁▁
elapsed_examples,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
elapsed_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_loss,██▆▇█▇▆▇▇▇▆▆▆▅▄▅▄▅▆▄▃▃▄▃▃▃▃▃▃▄▁▂▂▁▁▃▁▁▂▂
training_sequence_accuracy,▁▃▁▃▁▃▁▃█▃▃▃▁▃██▁▆▁▃▁▆▁▆▃▁▃▁▆▃▆▆▆▁▃▆▁▃▆▃

0,1
classification/accuracy,0.55
classification/auprc,0.53467
classification/auroc,0.56656
classification/f1.0,0.60177
classification/precision,0.53968
classification/recall,0.68
elapsed_examples,8004.0
elapsed_tokens,4094148.0
fine_tuned_model,davinci:ft-academics...
status,succeeded


## Final Parameter Run

prompt_loss_weight=X
default_parameters_otherwise.

In [None]:
sweep_tag = "reward_model_classification_final_run"
sweep_parameters = {
                    "prompt_loss_weight":[0.001], 
}
suffix = "reward_model_classification"

In [None]:
parameter_sets_for_sweep = build_parameter_sets(sweep_parameters)
print("Number of configs", len(parameter_sets_for_sweep))
for config in parameter_sets_for_sweep:
  print(config)

Number of configs 1
{'prompt_loss_weight': 0.001}


In [None]:
train_classification_dataset_path = "summarization_finetuning_datasets/reward_model_classification_finetuning_dataset_train_5000.jsonl"
train_classification_dataset_id = upload_dataset_to_openai(train_classification_dataset_path)
dev_classification_dataset_path = "summarization_finetuning_datasets/reward_model_classification_finetuning_dataset_validation_400.jsonl"
development_classification_dataset_id = upload_dataset_to_openai(dev_classification_dataset_path)
print("Train dataset id {}".format(train_classification_dataset_id))
print("Validation dataset id {}".format(development_classification_dataset_id))

Train dataset id file-9gbvLkFORbT15wiZgFJ0ot9Q
Validation dataset id file-06z43lNOgsDKznTMsp3yYwaA


In [None]:
finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = [], []
for parameter_set in parameter_sets_for_sweep: 
  finetuning_summaries_hp_tuning, finetuning_ids_hp_tuning = initialize_gpt3_finetuning_job(config=parameter_set, train_dataset_id=train_classification_dataset_id, validation_dataset_id=development_classification_dataset_id, finetuning_summaries=finetuning_summaries_hp_tuning, finetuning_ids=finetuning_ids_hp_tuning, positive_class=" Yes", suffix=suffix, use_all_hyperparameters=False)
print(finetuning_ids_hp_tuning)

Initialized job ft-rcFTogqGUyM2pPRSpJZXIGR4
with config {'prompt_loss_weight': 0.001}



['ft-rcFTogqGUyM2pPRSpJZXIGR4']


In [None]:
!openai api fine_tunes.get -i "ft-rcFTogqGUyM2pPRSpJZXIGR4"

{
  "created_at": 1669993498,
  "events": [
    {
      "created_at": 1669993498,
      "level": "info",
      "message": "Created fine-tune: ft-rcFTogqGUyM2pPRSpJZXIGR4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669995096,
      "level": "info",
      "message": "Fine-tune costs $479.93",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1669995096,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 10",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": 16,
    "classification_n_classes": 2,
    "classification_positive_class": " Yes",
    "compute_classification_metrics": true,
    "learning_rate_multiplier": 0.1,
    "n_epochs": 4,
    "prompt_loss_weight": 0.001
  },
  "id": "ft-rcFTogqGUyM2pPRSpJZXIGR4",
  "model": "davinci",
  "object": "fine-tune",
  "organization_id": "org-rRALD2hkdlmLWNVCKk9PG5Xq",
  "result_files": [],
  "status": "pending",
  "train

In [None]:
finetuning_ids_hp_tuning = ['ft-rcFTogqGUyM2pPRSpJZXIGR4']
for finetuning_id in finetuning_ids_hp_tuning:
  print(f"Monitoring {finetuning_id} ...")
  !openai api fine_tunes.follow -i {finetuning_id}
  print("\n\n")

Monitoring ft-rcFTogqGUyM2pPRSpJZXIGR4 ...
[2022-12-02 15:04:58] Created fine-tune: ft-rcFTogqGUyM2pPRSpJZXIGR4
[2022-12-02 15:31:36] Fine-tune costs $479.93
[2022-12-02 15:31:36] Fine-tune enqueued. Queue number: 10
[2022-12-02 15:38:53] Fine-tune is in the queue. Queue number: 9
[2022-12-02 15:38:54] Fine-tune is in the queue. Queue number: 8
[2022-12-02 15:39:34] Fine-tune is in the queue. Queue number: 7
[2022-12-02 15:41:51] Fine-tune is in the queue. Queue number: 6
[2022-12-02 15:42:02] Fine-tune is in the queue. Queue number: 5
[2022-12-02 15:46:30] Fine-tune is in the queue. Queue number: 4
[2022-12-02 15:52:41] Fine-tune is in the queue. Queue number: 3
[2022-12-02 15:58:10] Fine-tune is in the queue. Queue number: 2
[2022-12-02 16:02:08] Fine-tune is in the queue. Queue number: 1
[2022-12-02 16:11:57] Fine-tune is in the queue. Queue number: 0
[2022-12-02 16:13:35] Fine-tune started
[2022-12-02 17:00:57] Fine-tune is in the queue. Queue number: 3
[2022-12-02 17:03:25] Fine-t

In [None]:
finetuning_ids_hp_tuning = ['ft-rcFTogqGUyM2pPRSpJZXIGR4']
for finetuning_id in finetuning_ids_hp_tuning:
  WandbLogger.sync(finetuning_id, project="training_language_models_with_langauge_feedback", entity="jerry_crea", tags=[sweep_tag])

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced
