In [1]:
from IPython.display import display, Markdown
import os
from LS_AMG_RAG.data_snythesis import prompt_utils
import google.generativeai as palm

import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# chroma_client = chromadb.Client()
chroma_client = chromadb.PersistentClient(path="./")
gemini = prompt_utils.Gemini()
google_ef  = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=os.environ['GEMINI_API_KEY'])

In [3]:
documents = []
metadata = []
ids = []
# for idx, file_name in enumerate(os.listdir("../data/business_docs")):
#     with open(f"../data/business_docs/{file_name}", "r") as f:
#         file_contents = f.read()
#         documents.append(file_contents)
#         metadata.append({
#             "type": "business document",
#         })
#         ids.append(f"business_doc_{idx}")

# add all files within the 'data/' directory to chromadb using os.walk
for root, dirs, files in os.walk("../data"):
    for file in files:
        if file.endswith(".md"):
            category = root.split('\\')[-1]
            with open(os.path.join(root, file), "r") as f:
                file_contents = f.read()
                documents.append(file_contents)
                metadata.append({
                    "type": category,
                })
                ids.append(f"{category}_{file}")

In [4]:
len(documents)

74

In [5]:
for m in palm.list_models():
    print(m.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision
models/embedding-001
models/aqa


In [6]:
collection = chroma_client.create_collection(name="my_collection", embedding_function=google_ef)

In [7]:
collection.add(
    documents=documents,
    metadatas=metadata,
    ids=ids,
)

In [8]:
len(collection.get()['ids'])

74

In [11]:
queries = [
    "Who are the members of Instagram's board of directors?",
]
import time
total_start_time = time.time()
start_time = time.time()
results = collection.query(
    query_texts=queries,
    n_results=3
)
end_time = time.time()

print(f"Document retrieved: {results['ids'][0][-1]}")

print(f"Retrieval time: {end_time - start_time} seconds")

metaprompt = """You are a helpful and informative bot that answers questions using text from the reference document included below. \
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
strike a friendly and converstional tone. \
Use your own knowledge base in addition to the information provided in the document to answer the question. \
Make relevant assumptions and use your best judgement to answer the question. \
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
"""

start_time = time.time()
gemini_result = gemini.send_message(message=metaprompt.format(query=queries[0], relevant_passage=results['documents'][0][-1])).text
end_time = time.time()
total_end_time = time.time()
display(Markdown(gemini_result))
print(f"Gemini time: {end_time - start_time} seconds")
print(f"Total time: {total_end_time - total_start_time} seconds")

Document retrieved: company_bylaws_About Instagram.md
Retrieval time: 0.4795975685119629 seconds


The reference document you provided does not include any information about Instagram's board of directors. Therefore, I cannot answer this question.

Gemini time: 2.6514804363250732 seconds
Total time: 3.131078004837036 seconds


In [41]:
metaprompt = """You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
"""

start_time = time.time()
gemini_result = gemini.send_message(message=metaprompt.format(query=queries[0], relevant_passage=results['documents'][0][-1])).text
end_time = time.time()
display(Markdown(gemini_result))
print(f"Query time: {end_time - start_time} seconds")

I apologize, but I cannot answer your question as the provided text does not contain any information about the Direct Messaging update or any challenges associated with it.

Query time: 2.1806864738464355 seconds
