In [1]:
import os
from langchain_chroma import Chroma

CHROMA_PATH = "chroma"

if "GOOGLE_API_KEY" not in os.environ:
    with open("./.env", "r") as mykey:        
        os.environ["GOOGLE_API_KEY"] = mykey.read().strip()

# from langchain_community.embeddings.ollama import OllamaEmbeddings
# from langchain_community.embeddings.bedrock import BedrockEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Embedding Function: Used when creating the DB, or making a query.
def get_embedding_function():
    # Bedrock Embeddings for AWS Deploy
    # embeddings = BedrockEmbeddings(
    #    credentials_profile_name="default", region_name="us-east-1"
    #)
    # Ollama Embeddings for Local Run
    # Install and 'ollama pull llama2|mistral' to deploy.
    # Use 'ollama serve' for restful API
    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
    # 
    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
    return embeddings

from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
# from langchain_community.llms.ollama import Ollama
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Prepare the DB.
embedding_function = get_embedding_function()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
retriever = db.as_retriever(search_kwargs={"k" : 5})
# Generate query with the info augmented prompt.
# context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
# prompt = prompt_template.format(context=context_text, question=query_text)
# print(prompt)

# model = Ollama(model="mistral")
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
# response_text = model.invoke(prompt)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | model
    | StrOutputParser()
)


def run(user_message: str):
    response = rag_chain.invoke(user_message)
    
    contexts = db.similarity_search_with_score(user_message, k=5)
    sources = [doc.metadata.get("id", None) for doc, _score in contexts]
    
    return {"answer": response, "contexts": sources}

  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)


In [2]:
from tqdm import tqdm

data_samples = {
    'question': [
        'How to write a lambda in Racket?', 
        'What are First Order Functions, Higher Order Functions, and First Class Functions?'
    ],
    'answer': [],
    'contexts': [],
    'ground_truth': [
        """
         In Racket, you can use a lambda expression to produce a function directly.
         The lambda form is followed by identifiers for the function’s arguments, and then the function’s body expressions: ( lambda ( xidy* ) xexpry+ ).
         A lambda expression can also have the form (lambda rest-id body ...+)
         That is, a lambda expression can have a single rest-id that is not surrounded by parentheses.
         The resulting function accepts any number of arguments, and the arguments are put into a list bound to rest-id.""",
        """
         First order: functions are not real values. They cannot be used or returned as values by other functions.
         This means that they cannot be stored in data structures. This is what most “conventional” languages used to have in the past. (You will be implementing such a language in homework 4.)
         An example of such a language is the Beginner Student language that is used in HtDP, where the language is intentionally first-order to help students write correct code (at the early stages where using a function as a value is usually an error).
         It’s hard to find practical modern languages that fall in this category.
         Higher order: functions can receive and return other functions as values. This is what you get with C and modern Fortran.
         First class: functions are values with all the rights of other values.
         In particular, they can be supplied to other functions, returned from functions, stored in data structures, and new functions can be created at run-time. (And most modern languages have first class functions.)"""
    ]
}

for question in tqdm(data_samples['question']):
    result = run(question)
    data_samples['answer'].append(result['answer'])
    data_samples['contexts'].append(result['contexts'])


00%|█████████████████████████████████████████████| 2/2 [00:03<00:00,  1.52s/it]

In [5]:
from datasets import Dataset 
import os
from ragas import evaluate
from ragas.llms import BaseRagasLLM, LangchainLLMWrapper
from ragas.metrics import faithfulness, answer_correctness
from langchain.schema import LLMResult, Generation

class Gemini(BaseRagasLLM):
    def generate_text(
        self,
        prompt,
        n: int = 1,
        temperature: float = 1e-8,
        stop = None,
        callbacks = None,
    ):
        print(f'#################################Asking gemini len={len(prompt.to_string())}')
        # print(prompt)
        ai_message = model.invoke(prompt)
        print(f'#################################Got response from gemini len={len(ai_message.content)}')
        # print(ai_message.content);
        generation = Generation(text=ai_message.content)
        return LLMResult(generations=[[generation]])
        
    async def agenerate_text(
        self,
        prompt,
        n: int = 1,
        temperature = None,
        stop = None,
        callbacks = None,
    ):
        return self.generate_text(prompt,n,temperature,stop,callbacks)

# Create a custom is_finished_parser to capture Gemini generation completion signals
def gemini_is_finished_parser(response: LLMResult) -> bool:
    is_finished_list = []
    for g in response.flatten():
        resp = g.generations[0][0]

        # Check generation_info first
        if resp.generation_info is not None:
            finish_reason = resp.generation_info.get("finish_reason")
            if finish_reason is not None:
                is_finished_list.append(
                    finish_reason in ["STOP", "MAX_TOKENS"]
                )
                continue

        # Check response_metadata as fallback
        if isinstance(resp, ChatGeneration) and resp.message is not None:
            metadata = resp.message.response_metadata
            if metadata.get("finish_reason"):
                is_finished_list.append(
                    metadata["finish_reason"] in ["STOP", "MAX_TOKENS"]
                )
            elif metadata.get("stop_reason"):
                is_finished_list.append(
                    metadata["stop_reason"] in ["STOP", "MAX_TOKENS"] 
                )

        # If no finish reason found, default to True
        if not is_finished_list:
            is_finished_list.append(True)

    return all(is_finished_list)

dataset = Dataset.from_dict(data_samples)
g = LangchainLLMWrapper(model, is_finished_parser=gemini_is_finished_parser)
score = evaluate(dataset, metrics=[faithfulness, answer_correctness], llm=g, embeddings=embedding_function)
df = score.to_pandas()
df.to_csv('score.csv', index=False)

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Exception raised in Job[2]: TypeError(generate_content() got an unexpected keyword argument 'temperature')
Exception raised in Job[0]: TypeError(generate_content() got an unexpected keyword argument 'temperature')
Exception raised in Job[1]: TypeError(generate_content() got an unexpected keyword argument 'temperature')
Exception raised in Job[3]: TimeoutError()


KeyError: 0