In [6]:
import sys
sys.path.append("..")

from toolset import Toolset
import os
import pandas as pd 

In [7]:
toolset = Toolset()
api_docs = toolset.get_api_docs()

out_dir = "output/instruction_data/type-v/"
os.makedirs(out_dir, exist_ok=True)

### comb1

In [11]:
api_comb = [
"skyscanner80/hotels_search",
"skyscanner80/hotels_hotel_detail",
"skyscanner80/hotels_prices",
"skyscanner80/hotels_reviews",
"skyscanner80/hotels_similar_hotels"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. This API is responsible for providing a list of hotels
2. This API is responsible for providing hotel details
3. This API is responsible for providing a price list
4. This API is responsible for providing a review list
5. This API is responsible for providing a list of similar hotels


In [30]:
# example = "I am going to travel to London for business. Please help check the high-rate hotels under 500 USD per night in the business district."
# example = "I am going to Dubai for holiday. Can you give me a table of hotels in the Palm Jumeirah. Include price, a few photos, and rate and other important info in the table"
example = "provide me with a list of hotels in the downtown area of Shenzhen, sorted by price."

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

Tool: query hotels and flights in real-time data. By searching and comparing hotel options, the tool assists travelers in finding the most cost-effective option for their journey.
API-1. for providing a list of hotels
API-2. for providing hotel details
API-3. for checking hotel price
API-4. for providing a review list
API-5. for providing a list of similar hotels

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir, "comb1_group2.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### comb2

In [31]:
api_comb = [
"tourist_attraction/search",
"tourist_attraction/detail",
"tourist_attraction/photos",
"tourist_attraction/reviews"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. Search tourist attraction
2. Get detail of tourist attraction
3. Get photos of tourist attraction
4. Get reviews of tourist attraction


In [34]:
# example = "I am planning a travel and I am considering Tokyo. Can you show me some tourist attractions there?"
# example = "Give me some tourist information about Fuji-san."
example = "I am writing an article about the must go places in China. Collect some materials for me please."

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

Tool description: Discover the world's wonders with ease using the Tourist Attraction API. Unleash a wealth of information about popular attractions worldwide, including historical sites, natural landmarks, museums, and more. Seamlessly integrate this API to provide users with detailed insights, captivating images, and essential details, enriching their travel experiences like never before. Whether you\u2019re crafting travel apps, tour guides, or adventure platforms, the Tourist Attraction API is your gateway to enhancing journeys and exploration.
API-1. Search tourist attraction
API-2. Get detail of tourist attraction
API-3. Get photos of tourist attraction
API-4. Get reviews of tourist attraction

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir, "comb2_group3.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### Comb3

In [35]:
api_comb = [
"skyscanner80/flights_search_one_way",
"skyscanner80/flights_search_roundtrip",
"skyscanner80/flights_search_everywhere",
"skyscanner80/flights_search_incomplete",
"skyscanner80/flights_flight_detail",
"skyscanner80/flights_price_calendar"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. This API helps to get the list of one-way flights. Note:- In the event that the status is incomplete (data->context->status=incomplete), you must utilize the api/v1/flights/search-incomplete endpoint to retrieve the complete data until it's complete (data->context->status=complete).
2. This API helps to get the list of round trip flights. Note:- In the event that the status is incomplete (data->context->status=incomplete), you must utilize the `flights_search_incomplete` endpoint to retrieve the complete data until it's complete (data->context->status=complete).
3. This API helps to find flights with uncertain destinations.
4. Obtain complete data for the endpoint of flights_search_one_way, flights_search_roundtrip. Until the item's status is complete (data->context->status=complete), you must call the API multiple times
5. Get all the details of the fights based on itineraryId and token.
6. This API will return a calendar for the given source and destination


In [127]:
# example = "Check the flight price between Hongkong and Brisbane for me. Depart from Hongkong on July 10 and return from Brisbane on July 20. Compare the total prices of two single-trip and one round-trip."
# example = "Check the prices of round-trip flights from Hongkong to Brisbane, with departure date July 10 and return date July 20. Direct flights only."
example = "I plan to travel to Sydney for 7 days in August. Can you check the flights and tell me the best departure and return dates?"

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

Tool description: query hotels and flights in real-time data. By searching and comparing hotel options, the tool assists travelers in finding the most cost-effective option for their journey.
1. This API helps to get the list of one-way flights. 
2. This API helps to get the list of round trip flights. 
3. This API helps to find flights with uncertain destinations.
5. Get all the details of the fights based on itineraryId and token.
6. This API will return a calendar for the given source and destination

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir,"comb3_group3.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### Comb4

In [39]:

api_comb = [
"web_capture/take_image_screenshot",
"ocr_extract_text/extract_text_from_image_file"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. Get Screenshot of any website by providing URL and the image ratio (width, height)
2. Image must be a regular **JPEG** or **PNG** image (with or without transparency)


In [41]:
# example = "I want to save the article at https://lilianweng.github.io/posts/2024-02-05-human-data-quality/ . Save the screenshot of this webpage and extract text from it."
example = "save this article for me https://www.infoworld.com/article/3715457/generative-ai-agents-will-revolutionize-ai-architecture.html"
# example = ""

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

1. web_capture/take_image_screenshot: Get Screenshot of any website by providing URL
2. ocr_extract_text/extract_text_from_image_file

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir,"comb4_group2.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### comb5

In [42]:

api_comb = [
"image_processing/compress_image",
"image_processing/resize_image",
"image_processing/convert_format",
"image_remove_bg/remove_bg"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. Compress image size at the cost of lower quality.
2. Changing the size (i.e. width and height) of image.
3. Convert the format of an image.
4. Given the local path of an image, remove the background, and save the new image locally.


In [45]:

example = "I have one image at files/object_detection/birds.jpg. I want to remove its background, and save it to png format."
# example = ""

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

1. Compress image size at the cost of lower quality.
2. Changing the size (i.e. width and height) of image.
3. Convert the format of an image.
4. remove the background of an image.

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir,"comb5_group2.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### comb7

In [47]:
api_comb = [
"bing_search_apis/images_search",
"image_processing/compress_image",
"ocr_extract_text/extract_text_from_image_url",
"image_processing/resize_image"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. Search images from the search engine for the most relevant keywords and images related to keywords and images.
2. Compress image size at the cost of lower quality.
3. Image must be a regular **JPEG** or **PNG** image (with or without transparency)
4. Changing the size (i.e. width and height) of image.


In [49]:
# example = "Collect some proverb images and save the text in them."
example = "Curate a collection of classic book covers suitable for thumbnails"

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

1. images_search: Search images from the search engine for the most relevant keywords and images related to keywords and images.
2. compress_image: Compress image size at the cost of lower quality.
3. extract_text_from_image_url: Image must be a regular **JPEG** or **PNG** image (with or without transparency)
4. resize_image: Changing the size (i.e. width and height) of image.

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir,"comb7_group2.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### comb9

In [50]:
api_comb = [
"bing/web_search",
"bing_search_apis/images_search",
"google_api/video_search",
"news_api/search"
]

api_desc_list = []
for idx, api in enumerate(api_comb):
    tool_name, api_name = api.split("/")
    d = api_docs[tool_name][api_name]
    api_desc_list.append(f"{idx+1}. " + d['description'])
api_desc_text = "\n".join(api_desc_list)
print(api_desc_text)

1. Web search.
2. Search images from the search engine for the most relevant keywords and images related to keywords and images.
3. Search videos over web.
4. Endpoint to search news by specific keywords.


In [53]:
# example = "I am writing a book about Bill Gates. Help me collect some materials about him. I need some inspiration."
# example = "I am a fan of Taylor Swift. Show me some recent videos and news about her."
example = "Any news about NBA finals?"

prompt = f"""Please generate 5 high-level instructions which can be solved with the combination of some of the APIs below. 

1. web_search: Web search.
2. images_search: Search images from the search engine for the most relevant keywords and images related to keywords and images.
3. video_search: Search videos over web.
4. news_api/search: Endpoint to search news by specific keywords.

For example:
Instruction: {example}
"""

gpt = GPT(model_name="gpt-4-turbo")
resp = gpt.chat([{"role": "user", "content": prompt}])
with open(os.path.join(out_dir,"comb9_group3.txt"), "w") as file:
    file.write(prompt)
    file.write("\n\n==== RESPONSE ====\n")
    file.write(resp['content'])

### parse and merge data

In [None]:
import re

def parse_instructions(resp_text):
    prev_start, prev_end = None, None
    gen_text_list = []
    if len(list(re.findall(r"Instruction \d:", resp_text))):
        match_iter = re.finditer(r"Instruction \d:", resp_text) 
    elif len(list(re.findall(r"\d. Instruction:", resp_text))):
        match_iter = re.finditer(r"\d. Instruction:", resp_text) 
    elif len(list(re.findall(r"Instruction:", resp_text))):
        match_iter = re.finditer(r"Instruction:", resp_text) 
    else:
        raise Exception("no instruction found.")


    for m in match_iter:
        start = m.start()
        end = m.end()
        if prev_end is not None:
            last_gen = resp_text[prev_end: start]
            gen_text_list.append(last_gen.strip())
        prev_start, prev_end = start, end
    last_gen = resp_text[prev_end: ]
    gen_text_list.append(last_gen.strip())
    instruction_list = []
    explain_list = []
    for text in gen_text_list:
        lines = text.split("\n")
        instruction = lines[0]
        explain = "\n".join(lines[1:]).strip()
        instruction_list.append(instruction)
        explain_list.append(explain)
    return instruction_list, explain_list

In [129]:
data_dir = out_dir
name_list = os.listdir(data_dir)

merge_instruction_list = []
merge_explain_list = []
merge_name_list = []
for name in name_list:
    if not name.endswith(".txt"):
        continue
    print(f"== {name} ==")
    fn = os.path.join(data_dir, name)
    with open(fn) as file:
        cont = file.read()
        resp_text = cont.split("\n\n==== RESPONSE ====\n")[-1]
        try:
            instruction_list, explain_list = parse_instructions(resp_text)
        except Exception as e:
            print(e)
            continue
        merge_instruction_list.extend(instruction_list)
        merge_explain_list.extend(explain_list)
        merge_name_list.extend([name]*len(instruction_list))

df = pd.DataFrame(data={"fn": merge_name_list, "instruction": merge_instruction_list, "explain": merge_explain_list})
df.to_csv(os.path.join(data_dir, "../merged_instructions_type-v_semi.csv"))

== comb1_group1.txt ==
== comb1_group2.txt ==
== comb2_group1.txt ==
== comb2_group2.txt ==
== comb2_group3.txt ==
== comb3_group1.txt ==
== comb3_group2.txt ==
== comb3_group3.txt ==
== comb4_group1.txt ==
== comb4_group2.txt ==
== comb5_group1.txt ==
== comb5_group2.txt ==
== comb7_group1.txt ==
== comb7_group2.txt ==
== comb9_group1.txt ==
== comb9_group2.txt ==
== comb9_group3.txt ==
