In [93]:
# imports
import ast  # for converting embeddings saved as strings back to arrays
import openai  # for calling the OpenAI API
import pandas as pd  # for storing text and embeddings data
import tiktoken  # for counting tokens
from scipy import spatial  # for calculating vector similarities for search


# models
EMBEDDING_MODEL = "text-embedding-ada-002"
GET_MODEL = "gpt-3.5-turbo"
openai.api_key = "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"

In [83]:
df = pd.read_csv(r"C:\Users\IsmailOKIEHOMAR\OneDrive - ABYS MEDICAL\Bureau\text_note_book_embedded.csv")
df = df.drop(['Unnamed: 0'], axis=1)
df

Unnamed: 0,content,embedding
0,Abys® Medical Cysware® 4H web platform is a me...,"[-0.007212141994386911, 0.003152686171233654, ..."
1,The Abys® Medical Cysware® 4H web platform is ...,"[-0.003898132359609008, 0.004284433554857969, ..."
2,The Abys® Medical Cysware® 4H web platform is ...,"[-0.0034425745252519846, 0.0047572036273777485..."
3,Abys® Medical Cysware® 4H web platform is a me...,"[-0.0055861095897853374, 0.003452208824455738,..."
4,Abys® Medical Cysware® 4H allows the user to l...,"[-0.012136408127844334, 0.0065508619882166386,..."
...,...,...
89,This section allows the user to enrich his PAF...,"[-0.012685165740549564, 0.011060431599617004, ..."
90,It is possible to share each PAF with any Cysw...,"[-0.00435180077329278, -0.0074991160072386265,..."
91,The Validation tab allows you to manage the st...,"[0.002673330018296838, 0.0018990215612575412, ..."
92,In order to have the best possible collaborati...,"[-0.011402417905628681, 0.0028847637586295605,..."


In [84]:
df['embedding'] = df['embedding'].apply(ast.literal_eval)
df

Unnamed: 0,content,embedding
0,Abys® Medical Cysware® 4H web platform is a me...,"[-0.007212141994386911, 0.003152686171233654, ..."
1,The Abys® Medical Cysware® 4H web platform is ...,"[-0.003898132359609008, 0.004284433554857969, ..."
2,The Abys® Medical Cysware® 4H web platform is ...,"[-0.0034425745252519846, 0.0047572036273777485..."
3,Abys® Medical Cysware® 4H web platform is a me...,"[-0.0055861095897853374, 0.003452208824455738,..."
4,Abys® Medical Cysware® 4H allows the user to l...,"[-0.012136408127844334, 0.0065508619882166386,..."
...,...,...
89,This section allows the user to enrich his PAF...,"[-0.012685165740549564, 0.011060431599617004, ..."
90,It is possible to share each PAF with any Cysw...,"[-0.00435180077329278, -0.0074991160072386265,..."
91,The Validation tab allows you to manage the st...,"[0.002673330018296838, 0.0018990215612575412, ..."
92,In order to have the best possible collaborati...,"[-0.011402417905628681, 0.0028847637586295605,..."


In [85]:
# search function
def strings_ranked_by_relatedness(
    query: str,
    df: pd.DataFrame,
    relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
    top_n: int = 100
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    query_embedding_response = openai.Embedding.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response["data"][0]["embedding"]
    strings_and_relatednesses = [
        (row["content"], relatedness_fn(query_embedding, row["embedding"]))
        for i, row in df.iterrows()
    ]
    strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*strings_and_relatednesses)
    return strings[:top_n], relatednesses[:top_n]


In [86]:
# examples
strings, relatednesses = strings_ranked_by_relatedness("curling gold medal", df, top_n=5)
for string, relatedness in zip(strings, relatednesses):
    print(f"{relatedness=:.3f}")
    display(string)

relatedness=0.741


'170'

relatedness=0.720


'Measurements taken from the Abys® Medical Cysware® 4H web platform are indicative with an accuracy of 1.25mm for distance measurements and 3° for angle measurements'

relatedness=0.715


'Abys® Medical Cysware® 4H provides measurements on its web platform that should be considered as approximate and indicative. Distance measurements have an accuracy of 1.25mm, which means that the measured value may differ by up to 1.25mm from the actual value. Similarly, angle measurements have an accuracy of 3°, which means that the measured value may differ by up to 3° from the actual value. It is important to keep in mind that these measurements are intended to be used as a tool to assist in planning and decision-making, and should not be solely relied upon for precise measurements.'

relatedness=0.714


"It's important to note that Abys® Medical Cysware® 4H is only compatible with the latest versions of web browsers like Chrome, Firefox, Safari, or Edge."

relatedness=0.708


"Abys® Medical's liability is limited exclusively to the applications and uses indicated in these instructions for use."

In [102]:
def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def query_message(
    query: str,
    df: pd.DataFrame,
    model: str,
    token_budget: int
) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""
    strings, relatednesses = strings_ranked_by_relatedness(query, df)
    introduction = 'Use the below document on the Abys Medical Cysware 4H. If the answer cannot be found in the document, write "I could not find an answer."'
    question = f"\n\nQuestion: {query}"
    message = introduction
    for string in strings:
        next_article = f'\n\nAbys medical document:\n"""\n{string}\n"""'
        if (
            num_tokens(message + next_article + question, model=model)
            > token_budget
        ):
            break
        else:
            message += next_article
    return message + question


def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    message = query_message(query, df, model=model, token_budget=token_budget)
    if print_message:
        print(message)
    messages = [
        {"role": "system", "content": "You answer questions about Surgiverse, Cysware, Cysart, Terms of use and Abys Medical."},
        {"role": "user", "content": message},
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0
    )
    response_message = response["choices"][0]["message"]["content"]
    return response_message

In [103]:
ask('How can i do a mesurement?') 

'To make a distance measurement, the user must click twice: a first click on the starting point of the distance you wish to measure and then a 2nd click on the end point. The distance appears in white until the user clicks on the end point. A name must be given to the measurement. Once these steps are completed, the measurement appears in green and its value is annotated next to it in millimeters. To make an angle measurement, the user must click 3 times: a first click on the starting point of the angle you want to measure, then a 2nd click on the vertex and finally a 3rd click on the end point of the angle. The measurement appears in white until the user clicks on the end point. A name must be given to the measurement. Once these steps are completed, the measurement appears in green and its value is annotated next to it in degrees °.'

In [112]:
import os
import openai
import gradio
prompt = "hello, How can i help you ?"

def message_and_history(input, history):
    history = history or []
    s = list(sum(history, ()))
    s.append(input)
    inp = ' '.join(s)
    output = ask(inp)
    history.append((input, output))
    return history, history
block = gradio.Blocks(theme=gradio.themes.Monochrome())
with block:
    gradio.Markdown("""<h1><center>SurgiverseGPT</center></h1>
    """)
    chatbot = gradio.Chatbot()
    message = gradio.Textbox(placeholder=prompt)
    state = gradio.State()
    submit = gradio.Button("SEND")
    submit.click(message_and_history, 
                 inputs=[message, state], 
                 outputs=[chatbot, state])
block.launch(debug = True)

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.
