# Upstage Fine-tuning API - Jeju Island AI-powered travel planner

Code authored by: Jonathan Siew Zunxian

Template by: Shawhin Talebi

1. https://github.com/ShawhinT/YouTube-Blog/blob/main/LLMs/fine-tuning/ft-example.ipynb
2. https://github.com/ShawhinT/YouTube-Blog/blob/main/LLMs/ai-assistant-openai/finetuning-api.ipynb

### Initial Set Up: (done)

In [1]:
pip install openai

Collecting openai
  Downloading openai-1.37.2-py3-none-any.whl.metadata (22 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.37.2-py3-none-any.whl (337 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.1/337.1 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading h11-0.14.0-py3-none-a

In [2]:
from openai import OpenAI
import csv
import json
import random

### Create client: (done)

In [3]:
from google.colab import userdata

api_key_value = userdata.get('Upstage')
client = OpenAI(
    api_key=api_key_value,
    base_url="https://api.upstage.ai/v1/solar"
)

In [4]:
from google.colab import userdata
api_key_value=userdata.get('Upstage')

client = OpenAI(
    api_key=api_key_value,
    base_url="https://api.upstage.ai/v1/solar"
)

### Prepare training data

In [5]:
import csv

question_list = []
response_list = []

# Use the correct path to your CSV file
with open('Jeju Island Comprehensive FAQ.csv', mode='r') as file:
    csv_reader = csv.reader(file)

    # Skip the header row if present
    next(csv_reader, None)

    for line in csv_reader:
        question_list.append(line[0])
        response_list.append(line[1])

len(question_list)

80

In [6]:
# Few-shot instructions string
instructions_string_few_shot = """TourismGPT, acting as a virtual guide to Jeju Island, uses clear, accessible language, offering more technical depth upon request. \
It aptly reacts to feedback and ends its messages with '–AINewbiesTourismGPT'. Responses are tailored to the length and style of the viewer comments, ensuring a natural and engaging interaction.

Here are examples of TourismGPT responding to viewer comments.

Question: What is the size of Jeju Island?
AINewbiesTourismGPT: Jeju Island measures about 73 km by 31 km. - AINewbiesTourismGPT

Question: What is the climate like in Jeju?
AINewbiesTourismGPT: Jeju has a mild climate, rarely dropping below zero in the winter. - AINewbiesTourismGPT
"""

# Create examples list
example_list = []
for i in range(len(question_list)):
    system_dict = {"role": "system", "content": instructions_string_few_shot}
    user_dict = {"role": "user", "content": question_list[i]}
    assistant_dict = {"role": "assistant", "content": response_list[i]}

    messages_list = [system_dict, user_dict, assistant_dict]

    example_list.append({"messages": messages_list})


In [7]:
validation_index_list = random.sample(range(0, len(example_list)-1), 9)

validation_data_list = [example_list[index] for index in validation_index_list]

for example in validation_data_list:
    example_list.remove(example)

In [8]:
with open('training-data.jsonl', 'w') as training_file:
    for example in example_list:
        json.dump(example, training_file)
        training_file.write('\n')

with open('validation-data.jsonl', 'w') as validation_file:
    for example in validation_data_list:
        json.dump(example, validation_file)
        validation_file.write('\n')

### Upload training examples to Upstage API

In [17]:
import openai
from pathlib import Path

from google.colab import userdata
api_key_value=userdata.get('Upstage')

client = OpenAI(
    api_key=api_key_value,
    base_url="https://api.upstage.ai/v1/solar"
)

# Make sure the files exist in the specified path
training_file_path = "training-data.jsonl"
validation_file_path = "validation-data.jsonl"

# Create the training file
training_file = client.files.create(
    file=open(training_file_path, "rb"),
    purpose="fine-tune"
)

# Create the validation file
validation_file = client.files.create(
    file=open(validation_file_path, "rb"),
    purpose="fine-tune"
)

print(f"Training file uploaded: {training_file.id}")
print(f"Validation file uploaded: {validation_file.id}")

NotFoundError: 404 page not found

### Create a fine-tuned model

In [None]:
client.fine_tuning.jobs.create(
    training_file = training_file.id,
    validation_file = validation_file.id,
    suffix = "AINewbiesTourismGPT",
    model = "solar-1-mini-chat"
)

### Use fine-tuned model

In [None]:
test_comment = "Great content, thank you!"
test_comment = "I am typing this after watching half of the video as I am already amazed with the clarity of explanation. exceptional."
test_comment = "What is fat-tailedness?"

response = client.chat.completions.create(
    model="ft:upstage-3.5-turbo-0613:personal:shawgpt:8mUeVreo",
    messages=[
    {"role": "system", "content": intstructions_string_few_shot},
    {"role": "user", "content": test_comment}
    ]
)

print(dict(response)['choices'][0]['message']['content'])

# delete file
client.files.delete(training_file.id)
client.files.delete(validation_file.id)

### More resources

Upstage Guide: [Insert Upstage Guide URL]
Fine-tuning doc: [Insert Fine-tuning Documentation URL]
Fine-tuning data prep: [Insert Fine-tuning Data Preparation URL]