In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [35]:
api_key = os.getenv("OPENAI_KEY")
prompt = os.getenv("SYSTEM_PROMPT")

In [18]:
import pandas as pd
import json

def convert_csv_to_training_format(input_excel, output_file):
  system_message = {
      "role": "system",
      "content": prompt
  }

  dataset = pd.read_excel(input_excel)

  with open(output_file, "w", encoding="utf-8") as outfile:
    for row in dataset.itertuples():
      student_query = row.student_query
      expected_json = row.expected_json

      training_example = {
          "messages": [
              system_message,
              { "role": "user", "content": student_query },
              { "role": "assistant", "content": expected_json }
          ]
      }

      outfile.write(json.dumps(training_example) + "\n")

convert_csv_to_training_format("dataset.xlsx", "training_data.jsonl")

In [19]:
from openai import OpenAI
from time import sleep

# Initialize OpenAI client
client = OpenAI(api_key = api_key)

In [20]:
def upload_training_file(file_path):
    """Upload training file to OpenAI"""
    with open(file_path, "rb") as file:
        response = client.files.create(
            file=file,
            purpose="fine-tune"
        )
        return response.id

In [21]:
# Upload both training and validation files
training_file_id = upload_training_file("training_data.jsonl")

In [22]:
training_file_id

'file-KYxfexaegJzsvWTNzKc5b8'

In [23]:
def create_fine_tuning_job(training_file_id, validation_file_id=None, model="gpt-4o-mini-2024-07-18"):
    """Create a fine-tuning job"""
    response = client.fine_tuning.jobs.create(
        training_file=training_file_id,
        validation_file=validation_file_id,
        model=model
    )
    return response.id

# Start the fine-tuning job
job_id = create_fine_tuning_job(training_file_id)

In [24]:
job_id

'ftjob-dVSi8UTrF4mfdHtkAX1PYnWt'

In [26]:
def monitor_job(job_id):
    """Monitor fine-tuning job progress"""
    while True:
        job = client.fine_tuning.jobs.retrieve(job_id)
        print(f"Status: {job.status}")

        if job.status in ["succeeded", "failed"]:
            return job

        # List latest events
        events = client.fine_tuning.jobs.list_events(
            fine_tuning_job_id=job_id,
            limit=5
        )
        for event in events.data:
            print(f"Event: {event.message}")

        sleep(30)  # Check every 30 seconds

In [27]:
# Monitor the job until completion
job = monitor_job(job_id)
if job.status == "succeeded":
    fine_tuned_model = job.fine_tuned_model
    print(f"Fine-tuned model ID: {fine_tuned_model}")
else:
    print("Fine-tuning failed.")

Status: validating_files
Event: Validating training file: file-KYxfexaegJzsvWTNzKc5b8
Event: Created fine-tuning job: ftjob-dVSi8UTrF4mfdHtkAX1PYnWt
Status: validating_files
Event: Validating training file: file-KYxfexaegJzsvWTNzKc5b8
Event: Created fine-tuning job: ftjob-dVSi8UTrF4mfdHtkAX1PYnWt
Status: running
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-KYxfexaegJzsvWTNzKc5b8
Event: Created fine-tuning job: ftjob-dVSi8UTrF4mfdHtkAX1PYnWt
Status: running
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-KYxfexaegJzsvWTNzKc5b8
Event: Created fine-tuning job: ftjob-dVSi8UTrF4mfdHtkAX1PYnWt
Status: running
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-KYxfexaegJzsvWTNzKc5b8
Event: Created fine-tuning job: ftjob-dVSi8UTrF4mfdHtkAX1PYnWt
Status: running
Event: Fine-tuning j

In [28]:
def test_model(model_id, test_input):
    """Test the fine-tuned model"""
    completion = client.chat.completions.create(
        model=model_id,
        messages=[
            {
                "role": "system",
                "content": prompt
            },
            {"role": "user", "content": test_input}
        ]
    )
    return completion.choices[0].message

In [29]:
test_query = "Do I need to complete Differential Equations before I can take System Dynamics?"
result = test_model(fine_tuned_model, test_query)

In [30]:
# Parse the JSON response
import json
extracted_data = json.loads(result.content)
print(json.dumps(extracted_data, indent=2))

{
  "message": "To take System Dynamics (EGM312), you need to have completed Kinematics of Machines (EGM311). While Differential Equations (MAT359) is not a direct prerequisite for System Dynamics, it is a fundamental mathematical tool used in many engineering analyses. If you haven't taken it yet, you may want to consider doing so concurrently or beforehand to support your understanding of the material.",
  "suggested_courses": [
    {
      "course": "MAT359 - Ordinary Differential Equations",
      "reason": "This course provides essential mathematical methods that are widely used in dynamic system analysis, which will be beneficial when studying System Dynamics."
    }
  ]
}


In [31]:
print(fine_tuned_model)

ft:gpt-4o-mini-2024-07-18:personal::B8FcZzfV
