In [None]:
import openai
from openai.embeddings_utils import distances_from_embeddings
import numpy as np  # Ensure numpy is imported
import pandas as pd



# Create a context for a question by finding the most similar context from the dataframe
def create_context(
    question, df, max_len, n, size="ada"
):
    """
    Create a context for a question by finding the most similar context from the dataframe
    """

    # Get the embeddings for the question
    q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']

    # Get the distances from the embeddings
    df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')

    df_sorted = df.sort_values('distances', ascending=True)

    top_n_df = df_sorted.head(n)

    returns = []

    cur_len = 0

    for t in top_n_df['text'].values:
        if cur_len + len(t[:1500]) < max_len:
            returns.append(t[:1500])
            cur_len += len(t[:1500])
        else:
            break

    return "\n\n###\n\n".join(returns)



def answer_question(
    df,
    model="text-davinci-003",
    question="describe the course content of QST SM 131",
    max_len=7000,
    n=3,
    size="ada",
    debug=False,
    max_tokens=2000,
    stop_sequence=None
):
    """
    Answer a question based on the most similar context from the dataframe texts
    """
    context = create_context(
        question,
        df,
        max_len=max_len,
        n=n,
        size=size,
    )
    # If debug, print the raw model response
    if debug:
        print("Context:\n" + context)
        print("\n\n")

    try:
        # Create a completions using the question and context
        response = openai.Completion.create(
            prompt=f"Answer the question based on the context below, and if the question can't be answered based on the context, say \"I don't know\"\n\nContext: {context}\n\n---\n\nQuestion: {question}\nAnswer:",
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
            model=model,
        )
        return response["choices"][0]["text"].strip()
    except Exception as e:
        print(e)
        return ""
    

In [None]:
openai.api_key = "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"

df = pd.read_csv('/workspaces/BU_Chatbot/strawman-end-to-end-implementation/embeddings.csv', index_col=0)
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)


print(answer_question(df, question="what are the instructors for the course sm 132", n=3, debug=False))