# Pre-Reqs

In [None]:
%pip install langchain_openai langchain langchain-community langchain_chroma langchain_cohere pinecone langgraph

In [None]:
!pip install langfuse

# Graph

## vectordb

In [None]:
from chromadb.utils.embedding_functions import OllamaEmbeddingFunction

class LangChainOllamaEmbeddings:
    def __init__(self, model_name="nomic-embed-text", url="http://localhost:11434/api/embeddings"):
        self.ef = OllamaEmbeddingFunction(model_name=model_name, url=url)
    
    def embed_documents(self, texts):
        return self.ef(texts)
    
    def embed_query(self, text):
        return self.ef([text])[0]

In [22]:
import chromadb
from chromadb.utils.embedding_functions import OllamaEmbeddingFunction
from langchain_chroma import Chroma
from chromadb.config import Settings


 
ollama_embedding_function = OllamaEmbeddingFunction(
    model_name="nomic-embed-text",
    url="http://localhost:11434/api/embeded"
)


# ChromaDB client settings for REST API
chroma_settings = Settings(
    chroma_server_host="localhost",
    chroma_server_http_port=8000,
)

# Create Chroma vector store with Ollama embeddings
vectorstore = Chroma(
    collection_name="example_collection",
    embedding_function=ollama_embedding_function,
    persist_directory="./chroma_langchain_db",
    client_settings=chroma_settings
)

In [23]:
# Example: Add a document
vectorstore.add_texts(["Hello world!"], metadatas=[{"source": "test"}])

# Example: Query similar documents
results = vectorstore.similarity_search("Hello", k=1)
print(results)

AttributeError: 'OllamaEmbeddingFunction' object has no attribute 'embed_documents'

### loading dummy data

In [None]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
    id=1,
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
    id=2,
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
    id=3,
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
    id=4,
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
    id=5,
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
    id=6,
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
    id=7,
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
    id=8,
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
    id=9,
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
    id=10,
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

## initialize

In [None]:
from langgraph.graph import END, StateGraph
from langgraph.graph.message import MessagesState

class GraphState(MessagesState):
    question: str
    documents: list[str] | None
    generated: str


workflow = StateGraph(GraphState)

## retrieve node

In [None]:
from langchain_core.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    format_document,
)

In [None]:
blog_prompt = PromptTemplate.from_template(
    "{page_content}\nSource: {source}"
)

In [None]:
retriever = vector_store.as_retriever(
    search_kwargs={"k": 5}
)

@workflow.add_node
def retrieve(state: GraphState):
  messages = state["messages"]
  question = messages[-1].content
  docs = retriever.get_relevant_documents(question)
  documents_list = []
  for doc in docs:
    content = format_document(doc,blog_prompt)
    documents_list.append(content)
  return {"documents":documents_list}

## generate node

In [None]:
from langchain_core.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    format_document,
)
from langchain_core.prompts import MessagesPlaceholder

INSTRUCTIONS_TEMPLATE = """You are a helpful and thoughtful HR AI assistant employed by OffSec tasked with answering user questions related to Offsec HR.

Anything between the following `context` html blocks is retrieved from a OffSec knowledge bank, not part of the conversation with the user. \
<context>
{context}
<context/>

Remember: Only answer from the context above, providing the url when a source is used. If you cannot find an answer, politely tell the user you don't know the answer. """


ANSWER_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", INSTRUCTIONS_TEMPLATE),
        MessagesPlaceholder(variable_name="chat_history"),
    ]
)


In [None]:
from langchain_openai import ChatOpenAI

base_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage

@workflow.add_node
def generate_response(state: GraphState):
    """
    Generate response

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    documents = state["documents"]
    chat_history = state["messages"]


    rag_chain = (
        ANSWER_PROMPT
        | base_llm
        | StrOutputParser()
    ).with_config(run_name="generated_response")
    response = rag_chain.invoke(
        {
            "context": "\n\n".join(documents),
            "chat_history": chat_history,
        }
    )

    return {"messages": [AIMessage(content=response)]}

## compiling

In [None]:
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve","generate_response")
workflow.add_edge("generate_response",END)

In [None]:
from langgraph.checkpoint.memory import MemorySaver
memory = MemorySaver()
graph = workflow.compile(checkpointer=memory)

# Testing

In [None]:
from langchain_core.messages import AIMessage, HumanMessage

inputs = {"messages":[HumanMessage(content="Howdy sir!")]}

In [None]:
from langfuse.callback import CallbackHandler

langfuse_handler = CallbackHandler(
  secret_key="sk-lf-b9f1e3a9-2a8b-4c07-9faa-2630da389829",
  public_key="pk-lf-fe18fc16-819c-4423-a8c7-586fcbddecc5",
  host="https://langfuse.offsec.com"
)


In [None]:
config = {
        "configurable": {
            "thread_id": "75755",
            "tags": ["pops-dev"],
        },
        "callbacks": [langfuse_handler],
    }

In [None]:
graph.invoke(input=inputs,config=config)