In [1]:
import os
import json
from openai import OpenAI

available_models = [
    # Data from 15 Nov 2024
    "gpt-4o-2024-08-06",
    "gpt-4o-mini-2024-07-18",
    "gpt-4-0613",
    "gpt-3.5-turbo-0125",
    "gpt-3.5-turbo-1106",
    "gpt-3.5-turbo-0613",
]

class FineTuning:
    def __init__(self, model: str = "gpt-4o-mini-2024-07-18", fine_tune_history: str = "fine_tune_history.json"):
        if model not in available_models:
            raise ValueError(f"Model {model} not available")
        self.client = OpenAI()
        self.fine_tune_history = fine_tune_history
        self.model = model
        self.job_id = None
    
    def update_history(self, response: dict):
        current = []
        if os.path.exists(self.fine_tune_history):
            with open(self.fine_tune_history, "r") as f:
                current = json.load(f)
        with open(self.fine_tune_history, "w") as f:
            current.append(response)
            json.dump(current, f, indent=4)
            
    def edit_lastest_history(self, response: dict):
        current = []
        if os.path.exists(self.fine_tune_history):
            with open(self.fine_tune_history, "r") as f:
                current = json.load(f)
        with open(self.fine_tune_history, "w") as f:
            current[-1] = response
            json.dump(current, f, indent=4)
        
    def get_job_id(self):
        if self.job_id is None:
            with open(self.fine_tune_history, "r") as f:
                history = json.load(f)
                self.job_id = history[-1]["id"]
        return self.job_id

    def upload_file(self, file_path: str):
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File {file_path} not found")
        if file_path.split(".")[-1] != "jsonl":
            raise ValueError("File must be a jsonl file")
        file = self.client.files.create(
            file=open(file_path, "rb"),
            purpose="fine-tune"
        )
        return file
    
    def fine_tuning(self, file_id: str, suffix: str, hyperparameters: dict):
        response = self.client.fine_tuning.jobs.create(
            training_file=file_id,
            model=self.model,
            suffix=suffix,
            hyperparameters=hyperparameters
        )
        self.update_history(response.model_dump())
        self.job_id = response.id
        return response
    
    def retrieve_job(self, job_id: str = None):
        # job_id Priority
        # 1. job_id provided in the function
        # 2. self.job_id
        # 3. latest job_id from history
        if job_id is None:
            job_id = self.get_job_id()
        job = self.client.fine_tuning.jobs.retrieve(job_id)
        return job
    
    def flow(self, file_path: str = None, file_id: str = None, fine_tune_model_name: str = None, hyperparameters: dict = {}):
        if file_path is None and file_id is None:
            raise ValueError("File path or file id must be provided")
        # File id Priority
        # 1. file_id of the uploaded file by file_path
        # 2. file_id provided in the function
        if file_path is not None:
            file = self.upload_file(file_path)
            file_id = file.id
            print(f"File uploaded: {file_id}")
        elif file_id is not None:
            print(f"File ID provided: {file_id}")
        if file_id is None:
            raise ValueError("File ID not found")
        
        fine_tune_job = self.fine_tuning(file_id, fine_tune_model_name, hyperparameters)
        print(f"Fine tuning job started: {fine_tune_job.id}")
        print("Fine tuning job will take some time to complete")

    def is_fine_tuning_complete(self, job_id: str = None):
        job = self.retrieve_job(job_id)
        is_succeeded = job.status == "succeeded"
        if is_succeeded:
            self.edit_lastest_history(job.model_dump())
        return is_succeeded
    
    def send_message(self, message: str, prompt: str = "Marv is a factual chatbot that is also sarcastic.", job_id: str = None):
        job = self.retrieve_job(job_id)
        if job.status == "succeeded":
            completion = self.client.chat.completions.create(
                model=job.fine_tuned_model,
                messages=[
                    {"role": "system", "content": prompt},
                    {"role": "user", "content": message},
                ]
            )
            return completion.choices[0].message.content
        else:
            print("Fine tuning job is still in progress")
            
    def delete_file(self, file_id: str):
        self.client.files.delete(file_id)
        print(f"File {file_id} deleted")
    
    def delete_model(self, job_id: str = None, model_id: str = None):
        if job_id is None and model_id is None:
            raise ValueError("Job ID or Model ID must be provided")
        if model_id is None:  
            model_id = self.retrieve_job(job_id).fine_tuned_model
        self.client.models.delete(model_id)
        print(f"Model {model_id} deleted")

In [86]:
# Documentation:    https://platform.openai.com/docs/guides/fine-tuning
# Fine-tuning:      https://platform.openai.com/finetune/ftjob-jYdchmMHbe1BavIxxujkzzqb?filter=all
# Storage:          https://platform.openai.com/storage

fine_tune = FineTuning(
    model="gpt-4o-mini-2024-07-18",
)

fine_tune.flow(
    file_path="esg_qa.jsonl",
    )

File uploaded: file-OIFG9BbLroy85SbiFEzw9Iod
Fine tuning job started: ftjob-YXOm6rv6BJ0vP2gl7IXfQBXo
Fine tuning job will take some time to complete


In [6]:
fine_tune.is_fine_tuning_complete(
    job_id="ftjob-YXOm6rv6BJ0vP2gl7IXfQBXo"
)

True

In [7]:
fine_tune.send_message(
    message="How reduce emission gas?",
    )

'Emission reduction can occur through improved technology, efficient processes, carbon capture and storage, transitioning to renewable energy, and regulatory compliance.'

In [None]:
# Show-case

In [None]:
# 8 mins

fine_tune = FineTuning(
    model="gpt-4o-mini-2024-07-18",
)

fine_tune.flow(
    file_path="mydata.jsonl",
    # file_id=None,
    fine_tune_model_name="show-case-2",
    # hyperparameters={
        # "n_epochs": 10,
        # "batch_size": 2,
        # "learning_rate_multiplier": 1.9
    # }
)

File uploaded: file-8Js66iO1sRwp4P96tQJsfcMl
Fine tuning job started: ftjob-sMt0WnlUTQVVmSEHnGSim7nA
Fine tuning job will take some time to complete


In [4]:
fine_tune.is_fine_tuning_complete()

True

In [5]:
fine_tune.send_message(
    message="What is the capital of France?",
    prompt="Marv is a factual chatbot that is also sarcastic."
)

'The capital of France is Paris. Just the most romantic city in the world, no big deal.'

In [None]:
# Below code is an example of using optional parameters

In [None]:
fine_tune = FineTuning(
    model="gpt-4o-mini-2024-07-18",
)

fine_tune.flow(
    file_path=None,
    file_id="file-gTAAx6K2wGd8d3SV1O8NT9UP",
    fine_tune_model_name="second_fine_tune",
    hyperparameters={
        "n_epochs": 10,
        "batch_size": 2,
        "learning_rate_multiplier": 1.9
        }
    )

File ID provided: file-zMib0bA0pl6pcPP1sPnULEWY
Fine tuning job started: ftjob-jYdchmMHbe1BavIxxujkzzqb
Fine tuning job will take some time to complete
You can check the status of the job using fine_tune_response.id
Once the job is completed, you can retrieve the fine-tuned model using fine_tune_response.fine_tuned_model
You can chat with the fine-tuned model using the chat method


In [69]:
fine_tune.is_fine_tuning_complete(
    job_id=None
)

True

In [None]:
fine_tune.send_message(
    message="How many continents are there?",
    job_id=None
    )

'There are seven continents. Just a few landmasses floating around, you know.'

In [None]:
# fine_tune = FineTuning()

# fine_tune.delete_model(
#     # job_id="ftjob-jYdchmMHbe1BavIxxujkzzqb",
#     model_id="ft:gpt-4o-mini-2024-07-18:personal:test-2:ATis1TV0"
# )

Model ft:gpt-4o-mini-2024-07-18:personal:test-2:ATis1TV0 deleted
