In [9]:
import os
from dotenv import load_dotenv
# ✅ Step 1: Load environment variables
load_dotenv()
# Make sure to set your OpenAI API key in the .env file or directly here

# ✅ Step 2: Import libraries
from langchain.embeddings import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain import hub
import openai
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel

openai.api_key = os.getenv("OPENAI_API_KEY")  # Ensure you have set this in your .env file
embedding = OpenAIEmbeddings(openai_api_key=openai.api_key)
vectorstore = FAISS.load_local("nba_vector_db", embeddings=embedding, allow_dangerous_deserialization=True)

In [10]:
## Define the Retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})\
## Define the prompt
prompt = hub.pull("jclemens24/rag-prompt")



In [11]:
relevance_prompt_template = PromptTemplate.from_template(
    """
    Given the following question and retrieved context, determine if the context is relevant to the question.
    Provide a score from 1 to 5, where 1 is not at all relevant and 5 is highly relevant.
    Return ONLY the numeric score, without any additional text or explanation.

    Question: {question}
    Retrieved Context: {retrieved_context}

    Relevance Score:"""
)

In [12]:
def format_docs(docs):
 return "\n\n".join(doc.page_content for doc in docs)

In [13]:
def extract_score(llm_output):
    try:
        score = float(llm_output.strip())
        return score
    except ValueError:
        return 0

# Chain it all together with LangChain
def conditional_answer(x):
    relevance_score = extract_score(x['relevance_score'])
    if relevance_score < 4:
        return "I don't know."
    else:
        return x['answer']

In [14]:
## Define the LLM
llm = ChatOpenAI(model_name="gpt-4o-mini")
str_output_parser = StrOutputParser()

In [15]:
## Define the chain
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | RunnableParallel(
        {"relevance_score": (
            RunnablePassthrough()
            | (lambda x: relevance_prompt_template.format(question=x['question'], retrieved_context=x['context']))
            | llm
            | str_output_parser
        ), "answer": (
            RunnablePassthrough()
            | prompt
            | llm
            | str_output_parser
        )}
    )
    | RunnablePassthrough().assign(final_answer=conditional_answer)
)

In [16]:
rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [20]:
result = rag_chain_with_source.invoke("Who is the GOAT of NBA?")
relevance_score = result['answer']['relevance_score']
final_answer = result['answer']['final_answer']

print(f"Relevance Score: {relevance_score}")
print(f"Final Answer:\n{final_answer}")

Relevance Score: 5
Final Answer:
The GOAT (Greatest of All Time) of the NBA is widely considered to be Michael Jordan, given his numerous achievements, including six NBA championships, five regular season MVPs, and being named the greatest basketball player of the 20th century by the Associated Press. While LeBron James is also frequently mentioned in this debate, particularly for his all-around contributions and being the NBA's all-time leading scorer, many analysts and fans still favor Jordan in discussions about the greatest player in history.
