In [4]:
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone
from dotenv import load_dotenv
import os

load_dotenv()


  from tqdm.autonotebook import tqdm


True

In [6]:
pc = Pinecone()
index_name = os.getenv("PINECONE_INDEX_NAME")
index = pc.Index(index_name)
index.describe_index_stats()
vector_store = PineconeVectorStore(
    pinecone_index=index,
)


In [None]:
import pandas as pd

qa_df = pd.read_csv("qa_current.csv")
qa_df.head()


In [8]:
from app.engine import get_chat_engine


In [1]:
from llama_index.core.settings import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model_name = "text-embedding-3-large"

Settings.embed_model = OpenAIEmbedding(
    model=embed_model_name,
    embed_batch_size=100,
    max_retries=25,
    timeout=180,
    reuse_client=True,
    dimensions=3072,
)


In [6]:
Settings.llm.model

'gpt-3.5-turbo'

In [7]:
from llama_index.llms.openai import OpenAI

max_tokens = os.getenv("LLM_MAX_TOKENS")

Settings.llm = OpenAI(
    model= 'gpt-4o-mini',
    temperature= float(0),
    max_tokens= int(max_tokens) if max_tokens is not None else None,
)

In [8]:
Settings.llm.model

'gpt-4o-mini'

In [7]:
# messages = ''
# chat_engine = get_chat_engine()


In [8]:
# response = await chat_engine.achat("Can a friend of the church become a mentor?")
# print(response)


In [None]:
from test_tools import evaluate_response

messages = ""
chat_engine = get_chat_engine()
new_data = []
for index, row in qa_df.iterrows():
    question = row["Question"]
    ideal_answer = row["Ideal Answer"]
    chat_engine.reset()
    response = await chat_engine.achat(question)
    retrieved = "\n\n".join(
        [
            f"node_id: {idx+1}\n{node.metadata('url')}\n{node.text}"
            for idx, node in enumerate(response.source_nodes)
        ]
    )
    ia_evaluation = evaluate_response(
        question=question,
        ideal=ideal_answer,
        generated=response.response,
        nodes=retrieved,
    )
    ia_score, ia_explanation = ia_evaluation.split(" - ")
    # get only the number inside ()
    ia_score = ia_score.split("(")[1].split(")")[0]

    updated_row = row.to_dict()
    updated_row.update(
        {
            "Question": question,
            "Ideal Answer": ideal_answer,
            "Query Engine Answer": response.response,
            "Retrieved Content": retrieved,
            "ia_score": ia_score,
            "ia_explanation": ia_explanation,
        }
    )
    new_data.append(updated_row)


In [10]:
result_df = pd.DataFrame(new_data)
result_df.head()
result_df = result_df.dropna(subset=["Ideal Answer"])
result_df.to_csv("chatbot_answer.csv")


In [None]:
result_df
