In [34]:
# Transform data from pickle to jsonl
import pickle
with open("finetune_part2.pkl","rb") as f:
    data = pickle.load(f)

In [35]:
from src.datasets.MATH import MATHRecord,MATH
import json

mathDataset = MATH("./data/math.jsonl")


In [36]:
system_promt = """
You are a chatbot that finds and classifies the error in the students solution.
There are only two possible outcome for the questions: LOGICAL and CONCENTRATION.
If the mistake is made in reasoning or in the application of a rule, even though the basic operations are performed correctly the error type is LOGICAL.
If the incorrect result comes from a lapse in attention leads to an oversight or a careless mistake in the calculations or steps, or not using basic preconditions given in the question properly error type is CONCENTRATION.
Do not repeat any given information.
Explain the error and determine the error type.
State the error type explicitly in the end.
"""


new_sys_prompt = """
You are a chatbot that finds and classifies the error in the students solution.

Use the following step-by-step instructions to respond to the user inputs.

There are only two possible outcome for the questions: LOGICAL and CONCENTRATION.
If the mistake is made in reasoning or in the application of a rule, even though the basic operations are performed correctly the error type is LOGICAL.
If the incorrect result comes from a lapse in attention leads to an oversight or a careless mistake in the calculations or steps, or not using basic preconditions given in the question properly error type is CONCENTRATION.
Do not repeat any given information.
Explain the error and determine the error type.
State the error type explicitly in the end.



Step 1:
By comparing the student´s solution and the real solution find the error made and explain what is made wrong.

Step 2:
Decide the type of error in students solutions in the set of (LOGICAL, CONCENTRATION) 
"""

In [41]:
#Create training file from dataset
dict_list = []

for val in data:
    error_exp = val["error_explanation"]
    incorrect_sol = val["incorrect_solution"]
    error_type = val["error_type"]
    question = val["record"].question
    true_solution = mathDataset.records[val["record"].id].solution
    if type(true_solution) == list:
        true_solution = "".join(str(sol) for sol in true_solution)
    step_dict = {"messages": [{"role": "system", "content": system_promt},
                  {"role": "user", "content": "QUESTION: " + question +"\nCORRECT SOLUTION: "+ true_solution +"\nSTUDENT'S SOLUTION: " + incorrect_sol},
                  {"role": "assistant", "content": "ERROR EXPLANATION: "+ error_exp +"\nERROR TYPE: " + error_type}]}
    dict_list.append(step_dict)

with open("data.jsonl", 'w') as f:
    for item in dict_list:
        f.write(json.dumps(item) + "\n")

In [43]:
import time
import dotenv
import os

from openai import OpenAI

dotenv.load_dotenv()
API_KEY = os.getenv("OPENAI_API_KEY", "")
client = OpenAI()

In [None]:
fine_tune = client.files.create(
    file=open("data.jsonl", "rb"),
    purpose="fine-tune")
while True:
    finetune_file_handle = client.files.retrieve(file_id=fine_tune.id)
    print(finetune_file_handle.status)
    if finetune_file_handle.status == "processed":
        break
    time.sleep(60)

In [None]:
validation = client.files.create(
    file=open("validation.jsonl", "rb"),
    purpose="fine-tune")
while True:
    validation_file_handle = client.files.retrieve(file_id=validation.id)
    print(validation_file_handle.status)
    if validation_file_handle.status == "processed":
        break
    time.sleep(60)

In [None]:
# With Validation
job = client.fine_tuning.jobs.create(
    training_file=fine_tune.id,
    validation_file=validation.id,
    model="gpt-3.5-turbo",
    hyperparameters={
    "n_epochs":3
    }
    )
print(job.id)
print("DONT LOSE THIS")

In [None]:
# Without Validation
job = client.fine_tuning.jobs.create(
    training_file=fine_tune.id,
    model="gpt-3.5-turbo",
    hyperparameters={
    "n_epochs":7
    }
    )
print(job.id)
print("DONT LOSE THIS")

In [None]:
# Basic
job = client.fine_tuning.jobs.create(
    training_file=fine_tune.id,
    model="ft:gpt-3.5-turbo-0125:tum-sot-hctl::9VgFsUc6"
    )
print(job.id)
print("DONT LOSE THIS")

In [None]:
# Check finetuning condition
job_handle = client.fine_tuning.jobs.retrieve(job.id)
print("Job status: ", job_handle.status)
print(job_handle)
print(job_handle.fine_tuned_model)

In [None]:
# Auto Checker
while True:
    job_handle = client.fine_tuning.jobs.retrieve(job.id)
    if job_handle.status == "succeeded":
        break
    print("Job status: ", job_handle.status)
    time.sleep(60)

In [None]:
job_handle = client.fine_tuning.jobs.retrieve(job.id)
print(job_handle.fine_tuned_model)

In [None]:
# Save model name
with open("model_id.txt", "w") as file:
    file.write(job_handle.fine_tuned_model)

In [35]:
fine_tuning_list = client.fine_tuning.jobs.list()

In [40]:
saved_models = ["ft:gpt-3.5-turbo-0125:tum-sot-hctl::9T5ZS7eP","ft:gpt-3.5-turbo-0125:tum-sot-hctl::9Quieny6","ft:gpt-3.5-turbo-0125:tum-sot-hctl::9MisNHwy"]

In [41]:
selected = []
for fine_tune_job in fine_tuning_list.data:
    if fine_tune_job.fine_tuned_model in saved_models:
        selected.append(fine_tune_job)

In [None]:
# Get metrics 
checkpoints = client.fine_tuning.jobs.checkpoints.list(job.id).data
checkpoints[0]

In [30]:
losses = []
steps = []
for check in checkpoints:
    met = check.metrics
    losses.append(met.train_loss)
    steps.append(met.step)
losses.reverse()
steps.reverse()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

plt.style.use({"lines.linestyle": "solid", "lines.linewidth": 1})
plt.plot(steps, losses, color="#47c984")

plt.show()