In [2]:
from typing import Optional
import promptquality as pq
from promptquality import Scorers

all_metrics =[
    Scorers.latency,
    Scorers.pii,
    Scorers.toxicity,
    Scorers.tone,
    #rag metrics below
    Scorers.context_adherence, 
    Scorers.completeness_gpt,
    Scorers.chunk_attribution_utilization_gpt,]

# Uncertainty, BLEU and ROUGE are automatically included


#Custom scorer for response Length 
def executor(row) -> Optional[float]: 
    if row.response:
        return len(row.response)
    else:
        return 0

def aggregator(scores, indices) -> dict:
    return {'Response Length': sum(scores)/len(scores)}

length_scorer = pq.CustomScorer(name='Response Length', executor=executor, aggregator=aggregator)
all_metrics.append(length_scorer)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import os
from langchain_openai import ChatOpenAI 
from langchain.prompts import ChatPromptTemplate
from langchain. schema.runnable import RunnablePassthrough 
from langchain. schema import StrOutputParseri
from langchain_community.vectorstores import Pinecone as langchain_pinecone
from pinecone import Pinecone

def get_qa_chain(embeddings, index_name, k, llm_model_name, temperature):
    # setup retriever
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
    index = pc.Index(index_name)
    vectorstore = langchain_pinecone(index, embeddings.embed_query, "text")
    retriever = vectorstore.as_retriever(search_kwargs={"k": k})

    # setup prompt
    rag_prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Answer the question based only on the provided context."
            ),
            (
                "human",
                "Context: '{context}' \n\n Question: '{question}'"
                ),
        ]
    )

    # setup LLm
    llm = ChatOpenAI(model_name= llm_model_name, temperature=temperature)
    
    # helper function to format docs
    def format_docs(docs):
        return "\n\n".join([d.page_content for d in docs])
    
    # setup chain
    rag_chain =({"context": retriever | format_docs, "question": RunnablePassthrough()},rag prompt, llm, StrOutputParser())
    
    return rag_chain
    

SyntaxError: invalid syntax (3620504337.py, line 38)

In [10]:
from langchain_openai import OpenAIEmbeddings 
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as langchain_pinecone
from pinecone import Pinecone, ServerlessSpec 
import nest_asyncio
nest_asyncio.apply()
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.core.evaluation import DatasetGenerator, FaithfulnessEvaluator, RelevancyEvaluator
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
import time
import os
from llama_index.core import (ServiceContext,SimpleDirectoryReader,StorageContext,VectorStoreIndex,set_global_service_context)
from langchain_community.document_loaders import WebBaseLoader

from langchain_community.vectorstores import Chroma

from langchain_community import embeddings

from langchain_community.llms import Ollama

from langchain_core.runnables import RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser

from langchain_core.prompts import ChatPromptTemplate

from langchain.text_splitter import CharacterTextSplitter 

# Initialize Ollama embeddings.
embeddings = OllamaEmbeddings(model="llama:7b")


In [11]:
load_dotenv()

True

In [8]:
emb_model_name = "llama"
dimensions= "7b"
index_name = f"{emb_model_name}-{dimensions}".lower()

In [None]:
# setup retriever - ChromaDB
vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name=index_name,
        embedding=embeddings.ollama.OllamaEmbeddings(model='mistral'),
    )
retriever = vectorstore.as_retriever()

# perform the RAG 

# Setup prompt
rag_template = """Answer the question based only on the following context: {context}
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)

# Setup LLM
llm = Ollama(model="llama2")

# Setup chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke(question) 

In [None]:
# Initialize Ollama embeddings.
embeddings = OllamaEmbeddings(model="llama:7b")

In [None]:
# First, check if our index already exists and delete stale index
# Here I guess it is collection name

# create a new index

# index the documents

# Load qa chain
# Pass - embeddings, index_name, k, llm_model_name, temperature

In [None]:


index = pc.Index(index_name)
vectorstore = langchain_pinecone(index, embeddings.embed_query, "text")
retriever = vectorstore.as_retriever(search_kwargs={"k": k})


In [None]:

def rag_chain_executor(emb_model_name: str, dimensions: int, llm_model_name: str, k: int) -> None:
    # # initialise embedding model
    # if "text-embedding-3" in emb_model_name:
    #     embeddings = OpenAIEmbeddings(model=emb_model_name, 
    #                                   dimensions=dimensions)
    # else:
    #     embeddings = HuggingFaceEmbeddings(
    #         model_name=emb_model_name, 
    #         encode_kwargs = {'normalize_embeddings': True})
    
    index_name = f"{emb_model_name}-{dimensions}".lower()

    # First, check if our index already exists and delete stale index
    if index_name in [index_info['name'] for index_info in pc.list_indexes()]:
        pc.delete_index(index_name)
        
    # create a new index
    pc.create_index(name=index_name, 
                    metric="cosine", 
                    dimension=dimensions,
                    spec=ServerlessSpec(
                        cloud= "aws",
                        region="us-west-2"))

    time.sleep(10)

    # index the documents
    _ = langchain_pinecone.from_documents(documents, embeddings, index_name=index_name)
    time.sleep(10)

    # Load qa chain
    qa = get_qa_chain(embeddings, index_name, k, llm_model_name, temperature)
    # tags to be kept in galileo run
    run_name = f"{index_name}"
    index_name_tag = pq.RunTag(key="Index config", value=index_name, tag_type=pq.TagType.RAG)
    emb_model_name_tag = pq.RunTag(key="Emb", value=emb_model_name, tag_type=pq.TagType.RAG)
    llm_model_name_tag = pq.RunTag(key="LLM", value=llm_model_name, tag_type=pq.TagType.RAG)
    dimension_tag = pq.RunTag(key="Dimension", value=str(dimensions), tag_type=pq.TagType.RAG)
    topk_tag = pq.RunTag(key="Top k", value=str(k), tag_type=pq.TagType.RAG)

    evaluate_handler = pq.GalileoPromptCallback(
        project_name=project_name, 
        run_name=run_name, 
        scorers = all_metrics, 
        run_tags=[emb_model_name_tag,llm_model_name_tag,index_name_tag,dimension_tag,topk_tag]
        )

    # run chain with questions to generate the answers
    print("Ready to ask!")
    for i, q in enumerate(tqdm(questions)):
        print(f"Question {i}: ", q)
        print(qa.invoke(q, config=dict(callbacks=[evaluate_handler])))
        print("\n\n")
        
    evaluate_handler. finish()