# Finetuning GPT-3.X

https://github.com/norahsakal/fine-tune-gpt3-model/blob/main/fine_tune_step_by_step.ipynb

Finetuning GPT-turbo with **features as text** and **all-in-one strategy**.

Data files: `data_train_v2.jsonl`, `data_val_v2.jsonl`, `data_test_v2.jsonl`

## Libraries

In [1]:
# !pip install --upgrade pip
# !pip install openai
# (choose "base" kernel)

In [2]:
import os
import json
import pickle
import pandas as pd
from pathlib import Path

from sklearn.metrics import classification_report

import openai
from openai import OpenAI

## API key

In [3]:
api_key ="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"
openai.api_key = api_key

## Upload data to OpenAI

In [4]:
data_dir = os.path.join(os.getcwd(), "data")

In [5]:
train_file_name = os.path.join(data_dir, "data_train_v2.jsonl")
val_file_name = os.path.join(data_dir, "data_val_v2.jsonl")
test_file_name = os.path.join(data_dir, "data_test_v2.jsonl")

In [6]:
client = OpenAI(api_key=api_key)

### Train set

In [7]:
train_upload_response = client.files.create(
    file = Path(train_file_name),
    purpose = "fine-tune"
)

In [8]:
train_upload_response

FileObject(id='file-e3MVQ4T5v7AaJ5h04cyIMdlb', bytes=5320494, created_at=1705586715, filename='data_train_v2.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [9]:
train_file_id = train_upload_response.id
train_file_id

'file-e3MVQ4T5v7AaJ5h04cyIMdlb'

### Validation set

In [10]:
val_upload_response = client.files.create(
    file = Path(val_file_name),
    purpose = "fine-tune"
)

In [11]:
val_upload_response

FileObject(id='file-Mfz8963VXUPVr2t2FDwZ7DwY', bytes=630596, created_at=1705586719, filename='data_val_v2.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [12]:
val_file_id = val_upload_response.id
val_file_id

'file-Mfz8963VXUPVr2t2FDwZ7DwY'

### Test set

## Fine-tune model

In [13]:
# Launch fine-tuning

finetune_response = client.fine_tuning.jobs.create(
    training_file = train_file_id,
    validation_file = val_file_id,
    model = "gpt-3.5-turbo",
    hyperparameters = {"n_epochs": 4},
    suffix = "pe_finetune_v2c"
)

In [49]:
print(finetune_response.id)

ftjob-emm00n9KkG6LZv6jXp9Eh1Xd


In [21]:
# Checking progress

finetune_events = client.fine_tuning.jobs.list_events(fine_tuning_job_id=finetune_response.id, limit=10)
finetune_events.model_dump()

{'data': [{'id': 'ftevent-9VLV7Ok33hG2hqFCgthf4CO9',
   'created_at': 1705592422,
   'level': 'info',
   'message': 'The job has successfully completed',
   'object': 'fine_tuning.job.event',
   'data': {},
   'type': 'message'},
  {'id': 'ftevent-X4l7NoLD0SkSE4w3u11sxvXD',
   'created_at': 1705592419,
   'level': 'info',
   'message': 'New fine-tuned model created: ft:gpt-3.5-turbo-0613:personal:pe-finetune-v2c:8iOn5m9u',
   'object': 'fine_tuning.job.event',
   'data': {},
   'type': 'message'},
  {'id': 'ftevent-nllqCBZql6UhDnYke43ERVjm',
   'created_at': 1705592232,
   'level': 'info',
   'message': 'Step 1501/1567: training loss=0.09, validation loss=0.20',
   'object': 'fine_tuning.job.event',
   'data': {'step': 1501,
    'train_loss': 0.09431728720664978,
    'valid_loss': 0.20402912977265147,
    'train_mean_token_accuracy': 0.9473684430122375,
    'valid_mean_token_accuracy': 0.24390243902439024},
   'type': 'metrics'},
  {'id': 'ftevent-0xImOkBmpLPcI7EYjspszS0u',
   'created

In [57]:
retrieve_response = client.fine_tuning.jobs.retrieve(finetune_response.id)
retrieve_response.model_dump()

{'id': 'ftjob-emm00n9KkG6LZv6jXp9Eh1Xd',
 'created_at': 1705586730,
 'error': None,
 'fine_tuned_model': 'ft:gpt-3.5-turbo-0613:personal:pe-finetune-v2c:8iOn5m9u',
 'finished_at': 1705592418,
 'hyperparameters': {'n_epochs': 4,
  'batch_size': 11,
  'learning_rate_multiplier': 2},
 'model': 'gpt-3.5-turbo-0613',
 'object': 'fine_tuning.job',
 'organization_id': 'org-ao865HLfwm7KSarTu10iG90O',
 'result_files': ['file-wvwcQrcHtUBdtzZeBXnMr9CZ'],
 'status': 'succeeded',
 'trained_tokens': 4256564,
 'training_file': 'file-e3MVQ4T5v7AaJ5h04cyIMdlb',
 'validation_file': 'file-Mfz8963VXUPVr2t2FDwZ7DwY'}

## Save fine-tuned model

In [23]:
# Option 3

if retrieve_response.fine_tuned_model == None:
    
    finetuned_model = client.fine_tuning.jobs.retrieve("<JOB ID from API>").fine_tuned_model

else:
    
    finetuned_model = retrieve_response.model_dump()["fine_tuned_model"]

In [59]:
finetuned_model = retrieve_response.model_dump()["fine_tuned_model"]

In [60]:
finetuned_model

'ft:gpt-3.5-turbo-0613:personal:pe-finetune-v2c:8iOn5m9u'

## Evaluate on test set

In [44]:
# Evaluate the whole test set

predictions_l = []

with open(os.path.join(data_dir, "data_test_v2.jsonl"), 'r') as fh:
    
    lines_l = [json.loads(line) for line in fh]
    
    for i, line in enumerate(lines_l):
        
        if i%100 == 0:
            print(f"{i} samples processed")
        
        messages = line["messages"]
        
        response = client.chat.completions.create(
        model=finetuned_model,
        messages=messages
        )

        predictions_l.append(response.choices[0].message.content)

0 samples processed
100 samples processed
200 samples processed
300 samples processed
400 samples processed
500 samples processed
600 samples processed
700 samples processed
800 samples processed
900 samples processed
1000 samples processed
1100 samples processed
1200 samples processed


In [50]:
len(predictions_l)

1266

In [51]:
set(predictions_l)

{"Doesn't my previous reply answer your question?",
 'claim',
 'major claim',
 'premise'}

In [52]:
for i, x in enumerate(predictions_l):
    if x == 'Premise':
        predictions_l[i] = 'premise'

In [53]:
set(predictions_l)

{"Doesn't my previous reply answer your question?",
 'claim',
 'major claim',
 'premise'}

In [38]:
with open(os.path.join(data_dir, 'predictions_l_v2.pkl'), 'wb') as f:
    
    pickle.dump(predictions_l, f)

## Results

In [39]:
df = pd.read_csv(os.path.join(data_dir, "persuasive_essays_dataset.csv"), index_col=0)

In [40]:
grounds_l = list(df[df.split == 'TEST'].label.values)

In [41]:
# grounds_l

In [42]:
grounds_l = list(map(lambda x: x.replace("MajorClaim", "major claim"), grounds_l))
grounds_l = list(map(lambda x: x.replace("Claim", "claim"), grounds_l))
grounds_l = list(map(lambda x: x.replace("Premise", "premise"), grounds_l))

In [43]:
print(classification_report(grounds_l, predictions_l, digits=3))

              precision    recall  f1-score   support

       claim      0.723     0.635     0.676       304
 major claim      0.923     0.935     0.929       153
     premise      0.882     0.920     0.900       809

    accuracy                          0.853      1266
   macro avg      0.842     0.830     0.835      1266
weighted avg      0.848     0.853     0.850      1266

