In [1]:
from utils import *
import pandas as pd

In [53]:
import os
import replicate
from openai import OpenAI

In [None]:
! set REPLICATE_API_TOKEN=

In [None]:
def llama2_70b_chat(prompt):
    os.environ["REPLICATE_API_TOKEN"] = ""
    api = replicate.Client(api_token=os.environ["REPLICATE_API_TOKEN"])
    output = api.run(
        "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
        input={"prompt": prompt},
    )

    result = ""
    for item in output:
        result += item

    return result


def llama3_70b_instruct(prompt):
    os.environ["REPLICATE_API_TOKEN"] = ""
    api = replicate.Client(api_token=os.environ["REPLICATE_API_TOKEN"])
    output = api.run("meta/meta-llama-3-70b-instruct", input={"prompt": prompt})

    result = ""
    for item in output:
        result += item

    return result


def mistral_7b_instruct_v3(prompt):
    openai = OpenAI(
        api_key="",
        base_url="https://api.deepinfra.com/v1/openai",
    )

    chat_completion = openai.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.3",
        messages=[{"role": "user", "content": prompt}],
    )

    return chat_completion.choices[0].message.content

In [2]:
gpt4 = OpenAI_LLM("gpt-4o")

In [3]:
api_to_post = read_json("api_to_post")
benchmark = read_json("benchmark")
api_candidates = read_json("api_candidates")

In [4]:
def knowledge_extraction(
    knowledge_type,
    api,
    api_description,
    knowledge_type_description,
    example_knowledge,
    post,
    temperature,
):
    with open("prompts/autodoc/knowledge_extraction/instruction.txt", "r") as f:
        user_prompt = f.read().format(
            knowledge_type=knowledge_type,
            api=api,
            api_description=api_description,
            knowledge_type_description=knowledge_type_description,
            example_knowledge=example_knowledge,
            post=post,
        )
    return gpt4.free_output(user_prompt, temperature=temperature)[0]


def knowledge_validation(api, api_description, extracted_knowledge, post, temperature):
    with open("prompts/autodoc/knowledge_validation/instruction.txt", "r") as f:
        user_prompt = f.read().format(
            api=api,
            api_description=api_description,
            extracted_knowledge=extracted_knowledge,
            post=post,
        )
    return gpt4.free_output(user_prompt, temperature=temperature)[0]


def knowledge_summarization(api, knowledge_list, temperature):
    with open("prompts/autodoc/knowledge_summarization/instruction.txt", "r") as f:
        user_prompt = f.read().format(api=api, knowledge_list=knowledge_list)
    return gpt4.free_output(user_prompt, temperature=temperature)[0]

In [None]:
def generate_with_autodoc(api, model="gpt4o", temperature=0.8, ablate=[]):
    knowledge_type_description = {
        "functionality": "describes the actions or operations an API can perform",
        "concept": "covers the foundational ideas and terminologies for understanding and effectively utilizing an API.",
        "performance": "refers to the time and memory efficiency of an API.",
        "directive": "is an essential type of knowledge that provides guidelines on the proper use of an API, including best practices to follow and actions to avoid.",
        "pattern": "illustrates common use cases for applying the API to solve specific problems or achieve certain outcomes.",
        "environment": "specifies the necessary conditions, system requirements, or configurations under which an API can function correctly.",
        "alternative": "suggests other APIs offering similar functionality, which can be considered as replacements or complementary options.",
    }

    knowledge_type_example = {
        "functionality": "tf.gather is used to select tensor elements at specific indices.",
        "concept": "Tensor is essentially a high-dimensional array.",
        "performance": "tf.gather has overhead when used on large tensors.",
        "directive": "When using tf.gather, ensure indices are within the shape of the input tensor.",
        "pattern": "tf.gather is commonly used in embedding lookup operations.",
        "environment": "tf.gather requires TensorFlow installed and supports both CPU and GPU execution.",
        "alternative": "Alternatives to tf.gather include tf.scatter_nd and tf.index_select.",
    }

    for i in benchmark:
        if i["name"] == api:
            api_description = i["description"]
            break
    df = pd.read_csv("benchmark_csv/retrieved_post.csv")
    api_df = df[df["api"] == api]

    result = []

    for index, row in api_df.iterrows():
        result.append(
            {
                "post_id": row["post_id"],
                "cleaned_post": row["post"],
                "knowledge_type": row["knowledge_type"],
            }
        )

    for i in result:
        extraction_result = knowledge_extraction(
            i["knowledge_type"],
            api,
            api_description,
            knowledge_type_description[i["knowledge_type"]],
            knowledge_type_example[i["knowledge_type"]],
            i["cleaned_post"],
            temperature,
        )
        i["extraction_result"] = extraction_result

    for i in result:
        if i["extraction_result"] != "No such knowledge":
            validation_result = knowledge_validation(
                api, api_description, extraction_result, i["cleaned_post"], temperature
            )
            i["validation_result"] = validation_result

    knowledge_list = ""
    for i in result:
        if "validation_result" in i and i["validation_result"] == "Yes":
            knowledge_list += (
                f"{i['extraction_result']}. Knowledge type: {i['knowledge_type']}\n"
            )
    summarization_result = knowledge_summarization(api, knowledge_list, temperature)
    return summarization_result, knowledge_list, ""