# Finetuning GPT-3.X

https://github.com/norahsakal/fine-tune-gpt3-model/blob/main/fine_tune_step_by_step.ipynb

Finetuning GPT-turbo with **features as text** and **all-in-one strategy**.

Data files: `data_train_v2.jsonl`, `data_val_v2.jsonl`, `data_test_v2.jsonl`

## Libraries

In [None]:
# !pip install --upgrade pip
# !pip install openai
# (choose "base" kernel)

In [None]:
import os
import json
import pickle
import pandas as pd
from pathlib import Path

from sklearn.metrics import classification_report

import openai
from openai import OpenAI

## API key

In [None]:
api_key = "<your Open AI key>"
openai.api_key = api_key

## Upload data to OpenAI

In [None]:
data_dir = os.path.join(os.getcwd(), "data")

In [None]:
train_file_name = os.path.join(data_dir, "data_train_v2.jsonl")
val_file_name = os.path.join(data_dir, "data_val_v2.jsonl")
test_file_name = os.path.join(data_dir, "data_test_v2.jsonl")

In [None]:
client = OpenAI(api_key=api_key)

### Train set

In [None]:
train_upload_response = client.files.create(
    file = Path(train_file_name),
    purpose = "fine-tune"
)

In [None]:
train_upload_response

In [None]:
train_file_id = train_upload_response.id
train_file_id

### Validation set

In [None]:
val_upload_response = client.files.create(
    file = Path(val_file_name),
    purpose = "fine-tune"
)

In [None]:
val_upload_response

In [None]:
val_file_id = val_upload_response.id
val_file_id

### Test set

## Fine-tune model

In [None]:
# Launch fine-tuning

finetune_response = client.fine_tuning.jobs.create(
    training_file = train_file_id,
    validation_file = val_file_id,
    model = "gpt-3.5-turbo",
    hyperparameters = {"n_epochs": 2},
    suffix = "pe_finetune_v2c"
)

In [None]:
print(finetune_response.id)

In [None]:
# Checking progress

finetune_events = client.fine_tuning.jobs.list_events(fine_tuning_job_id=finetune_response.id, limit=10)
finetune_events.model_dump()

In [None]:
retrieve_response = client.fine_tuning.jobs.retrieve(finetune_response.id)
retrieve_response.model_dump()

## Save fine-tuned model

In [None]:
# Option 3

if retrieve_response.fine_tuned_model == None:
    
    finetuned_model = client.fine_tuning.jobs.retrieve("<JOB ID from API>").fine_tuned_model

else:
    
    finetuned_model = retrieve_response.model_dump()["fine_tuned_model"]

In [None]:
finetuned_model = retrieve_response.model_dump()["fine_tuned_model"]

In [None]:
finetuned_model

## Evaluate on test set

In [None]:
# Evaluate the whole test set

predictions_l = []

with open(os.path.join(data_dir, "data_test_v2.jsonl"), 'r') as fh:
    
    lines_l = [json.loads(line) for line in fh]
    
    for i, line in enumerate(lines_l):
        
        if i%100 == 0:
            print(f"{i} samples processed")
        
        messages = line["messages"]
        
        response = client.chat.completions.create(
        model=finetuned_model,
        messages=messages
        )

        predictions_l.append(response.choices[0].message.content)

In [None]:
len(predictions_l)

In [None]:
set(predictions_l)

In [None]:
for i, x in enumerate(predictions_l):
    if x == 'Premise':
        predictions_l[i] = 'premise'

In [None]:
set(predictions_l)

In [None]:
with open(os.path.join(data_dir, 'predictions_l_v2.pkl'), 'wb') as f:
    
    pickle.dump(predictions_l, f)

## Results

In [None]:
df = pd.read_csv(os.path.join(data_dir, "persuasive_essays_dataset.csv"), index_col=0)

In [None]:
grounds_l = list(df[df.split == 'TEST'].label.values)

In [None]:
# grounds_l

In [None]:
grounds_l = list(map(lambda x: x.replace("MajorClaim", "major claim"), grounds_l))
grounds_l = list(map(lambda x: x.replace("Claim", "claim"), grounds_l))
grounds_l = list(map(lambda x: x.replace("Premise", "premise"), grounds_l))

In [None]:
print(classification_report(grounds_l, predictions_l, digits=3))

### Results for 1 epoch
```
              precision    recall  f1-score   support

       claim      0.727     0.763     0.745       304
 major claim      0.855     0.961     0.905       153
     premise      0.935     0.896     0.915       809

    accuracy                          0.872      1266
   macro avg      0.839     0.873     0.855      1266
weighted avg      0.876     0.872     0.873      1266
```

### Results for 2 epochs
```
              precision    recall  f1-score   support

       claim      0.744     0.737     0.740       304
 major claim      0.946     0.922     0.934       153
     premise      0.911     0.918     0.914       809

    accuracy                          0.875      1266
   macro avg      0.867     0.859     0.863      1266
weighted avg      0.875     0.875     0.875      1266
```