# Using GPT3 to do few shot learning to get Jarvix entities

In [13]:
import openai as ai

ai.api_key = ''

def generate_gpt3_response(user_text, print_output=False):
    """
    Query OpenAI GPT-3 for the specific key and get back a response
    :type user_text: str the user's text to query for
    :type print_output: boolean whether or not to print the raw output JSON
    """
    completions = ai.Completion.create(
        engine='text-davinci-003',  # Determines the quality, speed, and cost.
        temperature=0.5,            # Level of creativity in the response
        prompt=user_text,           # What the user typed in
        max_tokens=100,             # Maximum tokens in the prompt AND response
        n=1,                        # The number of completions to generate
        stop=None,                  # An optional setting to control response generation
    )

    # Displaying the output can be helpful if things go wrong
    if print_output:
        print(completions)

    # Return the first choice's text
    return completions.choices[0].text

In [20]:
import pandas as pd

def get_ner_prompt(few_shot: int, data_path = "data/chunk_question_v1_1.csv"):

    ner_mapping = {"O":0, "I-Column": 1, "B-Column":2, "I-Operation":3, "B-Operation":4, "I-Restriction":5, "B-Restriction":6, "I-Pattern":7, "B-Pattern":8, "I-Others":9, "B-Others": 10}
    ner_remapping = {"0":"O",  "1":"I-Column", "2":"B-Column", "3":"I-Operation", "4":"B-Operation", "5":"I-Restriction", "6":"B-Restriction", "7":"I-Pattern", "8":"B-Pattern", "9":"I-Others", "10":"B-Others"}

    dataframe = pd.read_csv(data_path,header=0)

    # [Text]:\n[Jarvix-Entity]:\n###[Text]:New Text
    prompt = ""
    question_text = ""
    for index, row in dataframe.iterrows():

        ex_prompt = "[Text]:"
        chunks = row["text"].split()
        text = "".join(chunks)

        ex_prompt += text
        ex_prompt += "\n" 

        if index > few_shot:
            question_text = ex_prompt
            prompt+=ex_prompt
            break 

        for chunk,ner in zip(chunks,row["ner_tags"].split()):
            tmp_prompt = ""
            tmp_prompt += f"[{ner_remapping[str(ner)]}]:{chunk}"
            tmp_prompt +="\n"
            ex_prompt +=tmp_prompt
        ex_prompt += "###"
        prompt += ex_prompt
        
    return prompt, question_text



In [21]:
import pandas as pd

def get_conv_prompt(few_shot: int, data_path = "data/Jarvix_qa_label_v1_3.csv"):
    answer = ""
    dataframe = pd.read_csv(data_path,header=0)

    # [Text]:\n[Jarvix-Entity]:\n###[Text]:New Text
    prompt = ""
    question_text = ""
    columns = ""
    for index, row in dataframe.iterrows():
        # create converstaion example
        ex_prompt = ""
        ex_prompt +="[Question]:" + row["conversation"] + "\n"
        ex_prompt +="[Columns]:" + row["column"] + "\n"
        columns ="[Columns]:" + row["column"] + "\n"
        ex_prompt +="[Text]:" + row["text"] + "\n"
        ex_prompt +="###\n"
        
        ex_prompt +="[Question]:" + row["conversation2"] + "\n"
        ex_prompt +="[Columns]:" + row["column"] + "\n"

        ex_prompt +="[Text]:" + row["text"] + "\n"
        ex_prompt +="###\n"
        
        if index > few_shot:
        #     question_text = row["conversation2"]
        #     prompt+=ex_prompt + "[Text]:"
        #     answer = row["text"] 
            break 

        prompt += ex_prompt
        
    return prompt, columns



## Test GPT 3

In [22]:
prompt, columns = get_conv_prompt(10) # teach 10 samples
question = "[Question]:每個產品有哪些客戶編號\n"
prompt += question
prompt += columns
prompt += "[Text]:"
answer = "各 產品 的 客戶編號"
response = generate_gpt3_response(prompt)
print(f"response:{response}")
print(f"answer:{answer}")

response:各 產品 的 客戶編號
answer:各 產品 的 客戶編號


## Text GPT NEO X
