In [1]:
import os
os.chdir("..")

In [2]:
import streamlit as st  
from langchain.vectorstores import Pinecone
from langchain_openai import OpenAIEmbeddings
import pinecone

# Load resume
from toddbo.loader_utils import unzip, fetch_load_split

unzip()

index_name = st.secrets.pinecone.index
OPENAI_API_KEY = st.secrets.openai.OPENAI_API_KEY

# Load Pinecone
@st.cache_resource
def load_pinecone(_documents, embeddings=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)):
    pc = Pinecone(api_key=st.secrets.pinecone.api_key)
    index = pc.Index(st.secrets.pinecone.index)
    docsearch = Pinecone.from_documents(_documents, embeddings, index_name=st.secrets.pinecone.index)
    return docsearch

def build_retriever(search_type="mmr"):
    documents = fetch_load_split()
    vectordb = load_pinecone(documents)
    if vectordb is not None:
        retriever = vectordb.as_retriever(search_type=search_type)
        return retriever

  from tqdm.autonotebook import tqdm


In [4]:
import streamlit as st 
import time
import openai
from typing import Dict, List, Union
from langchain.retrievers.multi_query import MultiQueryRetriever
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def make_synchronous_openai_call(
    *,
    openai_api_key: str,
    model: str,
    temperature: float,
    messages: List[Dict[str, Union[str, Dict[str, str]]]],
    timeout_seconds: int,
):
    return openai.ChatCompletion.create(
        api_key=openai_api_key,
        model=model,
        messages=messages,
        top_p=1,
        n=1,
        max_tokens=st.secrets.openai.MAX_TOKENS,
        temperature=temperature,
        presence_penalty=0,
        frequency_penalty=0,
        logit_bias={},
        stream=False,
        request_timeout=timeout_seconds,
    )

def retrieve_resume_documents(
        llm, 
        user_prompt, 
        retriever) -> list:
    retriever_from_llm = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)
    unique_docs = retriever_from_llm.get_relevant_documents(query=user_prompt)
    return unique_docs


def retrieve_chroma_documents(
    client,
    prompt: str,
):
    retriever = generate_context(
        client,
        embedding_function=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY),
        collection_name=st.secrets.chroma.COLLECTION,
    )
    llm = ChatOpenAI(temperature=st.secrets.openai.temperature)
    retriever_from_llm = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)

    unique_docs = retriever_from_llm.get_relevant_documents(query=prompt)
    return unique_docs

def generate_search_results(
    *,
    llm,
    user_prompt: str,
    timeout_seconds: int=90,
) -> str:
    
    start_time = time.time()
    
    documents = build_chroma_retriever(llm, user_prompt)
    
    messages = [
        {
            "role": "system",
            "content": (
                "You're an personal assistant tasked with helping recruiters find relevant experience from Todd's resume. "
                "Your task is to provide as many relevant documents as possible. "
                "Lastly, generating results swiftly should be prioritized over achieving perfection."
            ),
        },
        {
            "role": "user",
            "content": "I'll provide input as text of a list of Documents in content that follows '!!!. "
            "Each item in the list contains page_content and metadata."
            "Provide a brief summary of all the documents."
            "Give the section from the metadata and the related content." 
            "Provide the information in short bullet points and provide the metadata with each document laid as such:"
            "if a word is between * and *, make the word appear bold."
            "*Summary*: "
            "*Section*: "
            "*Supporting Details*:"
            "Do not make stuff up. If a document has no valuable information, skip it."
            f"Here is the input !!!\n{str(documents)}",
        },
    ]
    start_time = time.time()
    
    openai_response = make_synchronous_openai_call(
        openai_api_key=st.secrets.openai.OPENAI_API_KEY,
        model=st.secrets.openai.OPENAI_MODEL,
        temperature=st.secrets.openai.temperature,
        messages=messages,
        timeout_seconds=timeout_seconds,
    )
    spent_time = time.time() - start_time
    print(f"Search took {spent_time} seconds")
    return openai_response["choices"][0]["message"]["content"]


In [10]:
# PREDEFINED
from toddbo import connect_to_chroma, connect_to_collection
client = connect_to_chroma(chroma_host=st.secrets.chroma.CHROMA_HOST, chroma_port=st.secrets.chroma.CHROMA_PORT)
collection = connect_to_collection(client, st.secrets.chroma.COLLECTION)

AttributeError: st.secrets has no attribute "OPENAI_API_KEY". Did you forget to add it to secrets.toml or the app settings on Streamlit Cloud? More info: https://docs.streamlit.io/streamlit-cloud/get-started/deploy-an-app/connect-to-data-sources/secrets-management

In [5]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(temperature=st.secrets.openai.temperature, model_name=st.secrets.openai.generation_model)

# Build the retriever
retriever = build_chroma_retriever()

TypeError: build_chroma_retriever() missing 1 required positional argument: 'prompt'

In [8]:
user_prompt = "Where did Todd work in 2021?"
documents = build_chroma_retriever(user_prompt)

NameError: name 'connect_to_chroma' is not defined

In [6]:
documents = fetch_load_split()

In [7]:
vectordb = load_pinecone(documents)

AttributeError: init is no longer a top-level attribute of the pinecone package.

Please create an instance of the Pinecone class instead.

Example:

    import os
    from pinecone import Pinecone, ServerlessSpec

    pc = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")
    )

    # Now do stuff
    if 'my_index' not in pc.list_indexes().names():
        pc.create_index(
            name='my_index', 
            dimension=1536, 
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-west-2'
            )
        )



In [8]:
from pinecone import Pinecone

pc = Pinecone(api_key=st.secrets.pinecone.api_key)

In [9]:
index = pc.Index(st.secrets.pinecone.index)

In [12]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.00032,
 'namespaces': {'v1': {'vector_count': 32}},
 'total_vector_count': 32}