### 1. Fine-tuning 

References:
- https://github.com/openai/openai-python
- https://platform.openai.com/docs/guides/fine-tuning

In [None]:
from pathlib import Path
from openai import OpenAI
import os
import csv
from tenacity import retry, stop_after_attempt, wait_random_exponential

#### API Key

In [None]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY")    
)

#### File Uploads

In [None]:
# training / validation files upload

train_response= client.files.create(
    file=Path("data/examples/gpt_finetuning_train.jsonl"),
    purpose="fine-tune",
)

val_response= client.files.create(
    file=Path("data/examples/gpt_finetuning_val.jsonl"),
    purpose="fine-tune",
)
      
train_file_id=train_response.id
val_file_id=val_response.id

print(f"Training file uploaded successfully with ID: {train_file_id}")
print(f"Validation file uploaded successfully with ID: {val_file_id}")

#### Fine-tuning

In [None]:
train_file_id=train_response.id
val_file_id=val_response.id
model_name="gpt-3.5-turbo-0125"

response=client.fine_tuning.jobs.create(training_file=train_file_id, model=model_name, validation_file=val_file_id, hyperparameters={"n_epochs": 3}) 
job_id= response.id
print(f"Fine-tuning job created successfully with ID: {job_id}")

In [None]:
# List up to 10 events from a fine-tuning job. Once the fine-tuning job is complete, you'll receive the ID of your fine-tuned model (model_id) here. 
client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10)

#### Inference

In [None]:
csv_file_path = "Path to an input file containing the problems"  
pred_csv_file_path="Path to an output file containing the problems and skill predictions"
model_id="Your fine-tuned model ID"

problems = []
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        problems.append(row['problem'])  

predicted_skills = []

for problem_text in problems:
    @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(5))
    def completion_with_backoff(**kwargs):
        return client.chat.completions.create(**kwargs)
    
    response = completion_with_backoff(
    model=model_id,   
    messages=[
        {"role": "system", "content": "Which standard from the Common Core Standards for Mathematics is most closely related to the given problem?"},
        {"role": "user", "content": "Problem: " + problem_text}
    ],
    temperature=0.2
)
    
    predicted_skills.append(response.choices[0].message.content)

updated_rows = []
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    fieldnames = csv_reader.fieldnames + ['pred_skill']
    for row, predicted_skill in zip(csv_reader, predicted_skills):
        row['pred_skill'] = predicted_skill
        updated_rows.append(row)

with open(pred_csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    csv_writer.writeheader()
    csv_writer.writerows(updated_rows)   

### 2. Standards in the prompt

In [None]:
csv_file_path = "Path to an input file containing the problems"  
pred_csv_file_path="Path to an output file containing the problems and skill predictions"
standards="A list of standards (skills)"

problems = []
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        problems.append(row['problem'])  

predicted_skills = []

for problem_text in problems:
    @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(5))
    def completion_with_backoff(**kwargs):
        return client.chat.completions.create(**kwargs)
    
    response = completion_with_backoff(
    model="gpt-3.5-turbo-0125",   
    messages=[
        {"role": "system", "content": "Which standard from the Common Core Standards for Mathematics is most closely related to the given problem?"},
        {"role": "user", "content": f"Problem: {problem_text}, Standards: {standards}"}
    ],
    temperature=0.2
)
    
    predicted_skills.append(response.choices[0].message.content)

updated_rows = []
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    fieldnames = csv_reader.fieldnames + ['pred_skill']
    for row, predicted_skill in zip(csv_reader, predicted_skills):
        row['pred_skill'] = predicted_skill
        updated_rows.append(row)

with open(pred_csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    csv_writer.writeheader()
    csv_writer.writerows(updated_rows)  