In [None]:
from langchain.vectorstores import Chroma
from dotenv import load_dotenv, find_dotenv
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
import openai
from langchain.chains import RetrievalQA
from langchain.retrievers.multi_query import MultiQueryRetriever
import os
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.prompts import PromptTemplate

_ = load_dotenv(find_dotenv()) # read local .env file
print(os.environ['LANGCHAIN_API_KEY'])

In [None]:
embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = OpenAIEmbeddings()

#embeddings_model_name = 'thenlper/gte-base'
#embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

In [None]:
llm_model = "gpt-3.5-turbo-16k"
llm = ChatOpenAI(model_name=llm_model, temperature=0)

In [None]:
persist_directory = '../chroma_clean_ada/'
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

print(vectordb._collection.count())


In [None]:
#retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k":5})

In [None]:
retriever = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

In [None]:
meta_question = "I will give you a multiple choice question and \
you will pick the right answer based on your knowledge and the given context. \
There will always be 5 questions, going from levels low to high. \
The sixth answer is always I don't know \
You will pick the answer from the multiple choices presented \
Also, after the answer, you will explain how you got to the answer, referrring to the pieces \
of context that gave you the answer: \
\
<question> \
{question} \
\
<possible answers> \
{answers}\
"

meta_prompt = PromptTemplate(
    template = meta_question, input_variables=['question', 'answers']
)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever
)


In [None]:
# Helper function for printing docs

def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

In [None]:
question = "To what extent is the relationship between climate hazards and \
social vulnerability/inequity understood among city leaders and staff?"

answers = " \
1 (Low) The relationship between climate hazards and social inequity has not been explored by staff or elected officials\
2 (Medium) The relationship between climate hazards and social inequity is familiar to select city staff or elected \
officials \
3 (High) City staff and elected officials are well-versed in the concepts and taxonomy of the relationship between climate hazards and social inequity \
4 I dont know \
"

query = meta_prompt.format(question=question, answers=answers)
query


In [None]:

result = qa_chain({"query": query})
print(result["result"])

In [None]:
question = "Are there programs in place to support education and knowledge transfer of Equitable Climate Resilience issues?"

result = qa_chain({"query": question})
print(result["result"])

In [None]:
vectordb.similarity_search(
    question,  # our search query
    k=5
    # return 3 most relevant docs
)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

retriever = vectordb.as_retriever()

In [None]:
from langchain import hub

prompt = hub.pull('langchain-ai/rag-fusion-query-generation')

In [None]:
# prompt = ChatPromptTemplate.from_messages([
#     ("system", "You are a helpful assistant that generates multiple search queries based on a single input query."),
#     ("user", "Generate multiple search queries related to: {original_query}"),
#     ("user", "OUTPUT (4 queries):")
# ])

In [None]:
generate_queries = prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split("\n"))

In [None]:
from langchain.load import dumps, loads
def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)
            
    reranked_results = [(loads(doc), score) for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)]
    return reranked_results 

In [None]:
chain = generate_queries | retriever.map() | reciprocal_rank_fusion

question = "Are there programs in place to support education and knowledge transfer of Equitable Climate Resilience issues?"

da_context = chain.invoke({"original_query": query})

da_context

In [None]:
concatenated_content = ""
for d in da_context:
    concatenated_content += d[0].page_content + "\n\n"

concatenated_content

In [55]:
result = qa_chain({"query": query, "context": concatenated_content})
print(result["result"])

Based on the given context, it is not clear how well city leaders and staff understand the relationship between climate hazards and social vulnerability/inequity. Therefore, the answer would be "I don't know" (option 4). The context does not provide any information about the level of understanding among city leaders and staff regarding this relationship.
