In [1]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from pathlib import Path
from langchain_openai import ChatOpenAI
from langchain_ollama import OllamaEmbeddings
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [2]:
load_dotenv()

True

In [3]:
class ExplainableRetriever:
    def __init__(self, texts):
        self.embeddings = OllamaEmbeddings(model="nomic-embed-text")
        self.vectorstore = FAISS.from_texts(texts, self.embeddings)
        
        self.llm = ChatOpenAI(temperature=0, model="meta-llama/llama-3.3-70b-instruct", max_tokens=4000)
        
        self.retriever = self.vectorstore.as_retriever(search_kwargs={"k": 5})
        
        explain_prompt = PromptTemplate(
            input_variables=["query", "context"],
            template="""
            Analyze the relationship between the following query and the retrieved context.
            Explain why this context is relevant to the query and how it might help answer the query.
            
            Query: {query}
            
            Context: {context}
            
            Explanation:
            """
        )
        self.explain_chain = explain_prompt | self.llm
        
    def retrieve_and_explain(self, query):
        docs = self.retriever.invoke(query)
        
        explained_results = []
        
        for doc in docs:
            input_data = {"query":query, "context":doc.page_content}
            explanation = self.explain_chain.invoke(input_data).content
            
            explained_results.append({
                "content" : doc.page_content,
                "explanation":explanation
            })        
            
        return explained_results

In [4]:
texts = [
    "The sky is blue because of the way sunlight interacts with the atmosphere.",
    "Photosynthesis is the process by which plants use sunlight to produce energy.",
    "Global warming is caused by the increase of greenhouse gases in Earth's atmosphere."
]

explainable_retriever = ExplainableRetriever(texts)

In [5]:
query = "Why is the sky blue?"
results = explainable_retriever.retrieve_and_explain(query)

for i, result in enumerate(results, 1):
    print(f"Result {i}:")
    print(f"Content: {result['content']}")
    print(f"Explanation: {result['explanation']}")
    print()

Result 1:
Content: The sky is blue because of the way sunlight interacts with the atmosphere.
Explanation: The query "Why is the sky blue?" is a question that seeks to understand the underlying reason for the sky's blue color. The retrieved context "The sky is blue because of the way sunlight interacts with the atmosphere" is directly relevant to the query because it provides a concise and accurate explanation for the phenomenon.

This context is relevant to the query for several reasons:

1. **Direct Answer**: The context provides a direct answer to the query, stating that the interaction between sunlight and the atmosphere is the reason for the sky's blue color.
2. **Scientific Explanation**: The context offers a scientific explanation, which is likely to satisfy the query's demand for a rational and evidence-based answer.
3. **Brevity and Clarity**: The context is brief and easy to understand, making it accessible to a wide range of audiences, from non-experts to experts in the fiel