## Environment Setup:

In [1]:
from functools import partial
from rich.console import Console
from rich.style import Style
from rich.theme import Theme

console = Console()
base_style = Style(color="#60B000", bold=True)
pprint = partial(console.print, style=base_style)

In [2]:
import os

model_name = os.environ['OLLAMA_MODEL']
embed_model_name = os.environ['OLLAMA_EMBEDDING_MODEL']
ollama_address = os.environ['OLLAMA_ADDRESS']
ollama_port = os.environ['OLLAMA_PORT']

print(f'models: {model_name}, {embed_model_name}')
print(f'ollama hosted at: {ollama_address}:{ollama_port}')

models: mistral:7b, mxbai-embed-large
ollama hosted at: http://ollama:11434


In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

template = """Question: {question}

Answer: Let's think step by step."""

prompt = ChatPromptTemplate.from_template(template)

print(ollama_address +":"+ ollama_port)

model = OllamaLLM(url=ollama_address +":"+ ollama_port ,model=model_name)

chain = prompt | model

def invoke(question):
    global chain
    return chain.invoke({"question": question})

# invoke("What is LangChain? and why should I use it?")

http://ollama:11434


In [4]:
from langchain_ollama import OllamaEmbeddings

embed = OllamaEmbeddings(
    base_url=ollama_address+":"+ollama_port,
    model=embed_model_name
)

def embed_text(text):
    global embed
    return embed.embed_query(text)

input_text = "The meaning of life is 42"
# print(embed_text(input_text)[:10])

# setup llm pipeline

In [5]:
from langchain_core.documents import Document
from langchain_chroma import Chroma
import chromadb

database_address = os.environ['IP_ADDRESS']
database_port = os.environ['DATABASE_PORT']

chroma_client = chromadb.HttpClient(host=database_address, port=database_port)
collection = chroma_client.get_or_create_collection(name="data")

vector_store_client = Chroma(
    client = chroma_client,
    collection_name="data",
    embedding_function=embed
)

In [6]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict, Annotated
from typing import Literal
from pydantic import BaseModel, Field
from ollama import chat
import json


class Search(BaseModel):
    query: str
    info_type: str
    #selection: Literal["race", "class", "subclass"]

#class Search(TypedDict):
#    """Search query."""
#    
#    query: Annotated[str, ..., "Search query to run."]
#    selection: Annotated[
#        Literal["race", "class", "subclass"],
#        ...,
#        "Type of creature information to query",
#    ]

# Define state for application


search_template = """Generate query around question that will help in finding informations in DnD race and class database. 
Add additional descriptions in querry to aid the process. 
Answer using the following format:

query: what to ask the database
info_type: race or class

Question: {question}"""

search_prompt = ChatPromptTemplate.from_template(search_template)

structured_model = OllamaLLM(url=ollama_address +":"+ ollama_port ,model=model_name, format = "json")

search_chain = search_prompt | structured_model

class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str
        
def analyze_query(state: State):
    response = search_chain.invoke({"question": state['question'], "format": Search.model_json_schema()})
    query = response
    return {"query": query}
        
# Define application steps
def retrieve(state: State):
    query = state["query"]
    
    pprint(query)
    
    query = json.loads(query)
    
    retrieved_docs = []
    
    if "Query" in query.keys():
        retrieved_docs = vector_store_client.similarity_search(
            query["Query"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    
    elif "query" in query.keys():
        retrieved_docs = vector_store_client.similarity_search(
            query["query"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    else:
        retrieved_docs = vector_store_client.similarity_search(
            state["question"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    
    return {"context": retrieved_docs}

new_template = """You are a helper for a DnD homebrew campaign. Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use five sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

new_prompt = ChatPromptTemplate.from_template(new_template)


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = new_prompt.invoke({"question": state["question"], "context": docs_content})
    response = model.invoke(messages)
    # return {"answer": response.content}
    return {"answer": response}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([analyze_query, retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [7]:
for step in graph.stream(
    {"question": "What races would be suitable for cyberpunk advanture?"},
    stream_mode="updates",
):
    print(f"{step}\n\n----------------\n")

{'analyze_query': {'query': '{\n"query": "Which DnD races have cybernetic enhancements or traits that could fit well in a cyberpunk adventure setting?",\n"info_type": "race"\n}'}}

----------------



{'retrieve': {'context': [Document(id='b3c1aea1-ecb7-457d-afab-a833079dab89', metadata={'source': 'htmls/Cyborg.html', 'type': 'race'}, page_content='A race whose physical abilities are extended beyond normal limitations by mechanical elements built into the body. Ability Score Increase . Your Intelligence score increases by 2. Age . Cyborgs can live up to 500 years. After that, their human parts start to fail and can no longer be kept on life support from the robot parts. Alignment . Cyborgs don’t tend to lean to any particular alignment unless they are programmed to. Size . Cyborgs are a little taller than humans on average and can weigh over 400 lb. Your size is medium. Speed . Your walking speed is equal to 30 ft. Thermal Vision . You can see in dim light within 60 feet of you as if it were bright light, and in darkness as if it were dim light, in addition to being able to see through objects in this range. You can see different colors, the hotter the object the more on the warm sp

In [8]:
result = graph.invoke(
    {"question": "What races would be suitable for cyberpunk advanture?"})

pprint(f'Context: {result["context"]}\n\n')
pprint(f'Answer: {result["answer"]}')

In [2]:
%%writefile server_app.py
# https://python.langchain.com/docs/langserve#server
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langserve import add_routes

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.prompt_values import ChatPromptValue
from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough
from langchain_core.runnables.passthrough import RunnableAssign
from langchain_community.document_transformers import LongContextReorder
from functools import partial
from operator import itemgetter

import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

from langchain_ollama import OllamaEmbeddings

from langchain_core.documents import Document
from langchain_chroma import Chroma
import chromadb

from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict, Annotated
from typing import Literal
from pydantic import BaseModel, Field
from ollama import chat, AsyncClient
from ollama import ChatResponse
import json

# Get env variables

model_name = os.environ['OLLAMA_MODEL']
embed_model_name = os.environ['OLLAMA_EMBEDDING_MODEL']
ollama_address = os.environ['OLLAMA_ADDRESS']
ollama_port = os.environ['OLLAMA_PORT']

print(f'models: {model_name}, {embed_model_name}')
print(f'ollama hosted at: {ollama_address}:{ollama_port}')

# create basic model

template = """Question: {question}

Answer: Let's think step by step."""

prompt = ChatPromptTemplate.from_template(template)

print(ollama_address +":"+ ollama_port)

model = OllamaLLM(url=ollama_address +":"+ ollama_port ,model=model_name)

chain = prompt | model

def invoke(question):
    global chain
    return chain.invoke({"question": question})

# get embedding model

embed = OllamaEmbeddings(
    base_url=ollama_address+":"+ollama_port,
    model=embed_model_name
)

# get vectorstore

database_address = os.environ['IP_ADDRESS']
database_port = os.environ['DATABASE_PORT']

chroma_client = chromadb.HttpClient(host=database_address, port=database_port)
collection = chroma_client.get_or_create_collection(name="data")

vector_store_client = Chroma(
    client = chroma_client,
    collection_name="annotated_data",
    embedding_function=embed
)

# RAG

class Search(BaseModel):
    query: str
    info_type: str
    #selection: Literal["race", "class", "subclass"]

#class Search(TypedDict):
#    """Search query."""
#    
#    query: Annotated[str, ..., "Search query to run."]
#    selection: Annotated[
#        Literal["race", "class", "subclass"],
#        ...,
#        "Type of creature information to query",
#    ]

# Define state for application


search_template = """Generate query around question that will help in finding informations in DnD race and class database. 
Add additional descriptions in querry to aid the process. Do not ask for official DnD books. 
Answer using the following format:

query: what to ask the database
info_type: race or class

Question: {question}"""

search_prompt = ChatPromptTemplate.from_template(search_template)

structured_model = OllamaLLM(url=ollama_address +":"+ ollama_port ,model=model_name, format = "json")

search_chain = search_prompt | structured_model

class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str
        
def analyze_query(state: State):
    print(state["question"])
    response = search_chain.invoke({"question": state['question'], "format": Search.model_json_schema()})
    query = response
    return {"query": query}
        
# Define application steps
def retrieve(state: State):
    query = state["query"]
    
    print(query)
    
    query = json.loads(query)
    
    retrieved_docs = []
    
    if "Query" in query.keys():
        retrieved_docs = vector_store_client.similarity_search(
            query["Query"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    
    elif "query" in query.keys():
        retrieved_docs = vector_store_client.similarity_search(
            query["query"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    else:
        retrieved_docs = vector_store_client.similarity_search(
            state["question"]
            #filter=lambda doc: doc.metadata.get("section") == query["section"],
        )
    
    return {"context": retrieved_docs}

new_template = """You are a helper for a DnD campaign. Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use five sentences maximum and keep the answer as concise as possible.
Do not talk about information from context that isn't useful for this question.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

new_prompt = ChatPromptTemplate.from_template(new_template)


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = new_prompt.invoke({"question": state["question"], "context": docs_content})
    response = model.invoke(messages)
    # return {"answer": response.content}
    return {"answer": response}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([analyze_query, retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

# Routes
app = FastAPI(
  title="LangChain Server",
  version="1.0",
  description="A simple api server using Langchain's Runnable interfaces",
)

origins = [
    "http://localhost:5173",
]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

add_routes(
    app,
    RunnableLambda(lambda x: invoke(x)),
    path="/basic_chat",
)

add_routes(
    app,
    RunnableLambda(lambda x: graph.invoke({"question": x})["answer"]),
    path="/generator",
)

add_routes(
    app,
    RunnableLambda(lambda x: vector_store_client.as_retriever(x)),
    path="/retriever",
)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5678)

Overwriting server_app.py


In [5]:
!python server_app.py

[32mINFO[0m:     Started server process [[36m127[0m]
[32mINFO[0m:     Waiting for application startup.

     __          ___      .__   __.   _______      _______. _______ .______     ____    ____  _______
    |  |        /   \     |  \ |  |  /  _____|    /       ||   ____||   _  \    \   \  /   / |   ____|
    |  |       /  ^  \    |   \|  | |  |  __     |   (----`|  |__   |  |_)  |    \   \/   /  |  |__
    |  |      /  /_\  \   |  . `  | |  | |_ |     \   \    |   __|  |      /      \      /   |   __|
    |  `----./  _____  \  |  |\   | |  |__| | .----)   |   |  |____ |  |\  \----.  \    /    |  |____
    |_______/__/     \__\ |__| \__|  \______| |_______/    |_______|| _| `._____|   \__/     |_______|
    
[1;32;40mLANGSERVE:[0m Playground for chain "/generator/" is live at:
[1;32;40mLANGSERVE:[0m  │
[1;32;40mLANGSERVE:[0m  └──> /generator/playground/
[1;32;40mLANGSERVE:[0m
[1;32;40mLANGSERVE:[0m Playground for chain "/basic_chat/" is live at:
[1;32;40mLANGSERVE:[

Can you tell me other race suited for desert, other than the camelfolk?
Prompt: Can you tell me other race suited for desert, other than the camelfolk?
Calling function:query_database
Arguments:{'metadata': '', 'query': 'race suitable for desert environment other than camelfolk'}
Searching for: race suitable for desert environment other than camelfolk with set metadata: []
Function output:[Document(metadata={'Header 3': 'Society[edit]', 'url': 'https://www.dandwiki.com/wiki/Camelfolk_(5e_Race)\n', 'section': 'race'}, page_content="Their society is based on both compassion and resilience. Despite their reclusive nature they have been known to help out those who have fallen unconscious in the deserts, usually leaving them outside villages in the desert for others to take care of, or leaving them by an oasis, usually, the furthest away possible from the one where they live. They rarely ever leave the desert unless they are into exotic merchandise. Population in their cities is homogenousl

thank you
Prompt: thank you
Calling function:query_database
Arguments:{'metadata': 'race', 'query': 'An agile and stealthy humanoid with keener senses than humans, elves often excel in the arcane arts.'}
Searching for: An agile and stealthy humanoid with keener senses than humans, elves often excel in the arcane arts. with set metadata: []
Function output:[Document(metadata={'Header 3': 'False Elf Traits[edit]', 'url': 'https://www.dandwiki.com/wiki/False_Elf_(5e_Race)\n', 'section': 'race'}, page_content="Specialized elf hunters Ability Score Increase . Your Dexterity score increases by 2, and your Charisma score increases by 1. Age . False elves live up to about 150 or so years, usually maturing around 20, or when they kill their first elf. Alignment . False elves do not see what they do as evil. It is more like survival. They range from pure evil to neutral. Size . As they mimic elves, they share about the same height and build. Your size is Medium. Speed . Your base walking speed i

Final response:  Here is the information from the provided text, formatted for easier reading:

Race: Soulless Undead (Demigod, Variant 2)

Description:
- Bonus Traits: Grave Portfolio, Death's Embrace, Dark Sigil
- Grave Portfolio: Advantage on saving throws against undead, can cast Vampiric Touch once per long rest using this trait
- Life Drinker: Ability to cast Vampiric Touch once per long rest at 3rd level
- Shroud: Temporary hit points upon action usage, lasts for 1 hour or until depleted, regained after long rest
- Purifying Mark: Undead recognize you as one of them and will not attack you unless ordered to

Ancestry: Soulless emerge from another race. You learn all proficiencies from the other race or choose two proficiencies (including tools, single weapons, skills, and languages)

Languages: Choose 3 languages of your choice. Normally you know the languages from before your death. If your ancestry race does not have 3 languages, you can choose the rest up to 3.
 Here is the i

In [4]:
%%writefile server_app.py
import asyncio

# https://python.langchain.com/docs/langserve#server
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langserve import add_routes

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.prompt_values import ChatPromptValue
from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough
from langchain_core.runnables.passthrough import RunnableAssign
from langchain_community.document_transformers import LongContextReorder
from functools import partial
from operator import itemgetter

import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

from langchain_ollama import OllamaEmbeddings

from langchain_core.documents import Document
from langchain_chroma import Chroma
import chromadb

from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict, Annotated
from typing import Literal
from pydantic import BaseModel, Field
import ollama
from ollama import ChatResponse
import json

from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import LocalFileStore
from langchain.storage._lc_store import create_kv_docstore


# Get environment variables

model_name = os.environ['OLLAMA_MODEL']
embed_model_name = os.environ['OLLAMA_EMBEDDING_MODEL']
ollama_address = os.environ['OLLAMA_ADDRESS']
ollama_port = os.environ['OLLAMA_PORT']

# Vector database

embed = OllamaEmbeddings(
    base_url=ollama_address+":"+ollama_port,
    model=embed_model_name
)

database_address = os.environ['IP_ADDRESS']
database_port = os.environ['DATABASE_PORT']

chroma_client = chromadb.HttpClient(host=database_address, port=database_port)
collection = chroma_client.get_or_create_collection(name="data")

vector_store_client = Chroma(
    client = chroma_client,
    collection_name="annotated_data",
    embedding_function=embed
)

filestore = LocalFileStore("./docstore")
docstore = create_kv_docstore(filestore)

text_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.HTML, 
    chunk_size=400,
    chunk_overlap=80
)

retriever = ParentDocumentRetriever(
    vectorstore = vector_store_client,
    docstore=docstore,
    child_splitter=text_splitter
)

# define tool functions
def query_database(query: str, metadata : str = "") -> str:
    """
    Description of race or class that query needs to find
    
    Args:
        query (set): description of race or class that query needs to find
        metadata (set): The additional arguments to filter by e.g "race", "class" or "subclass"
        
    Returns:
        str: The fragment of homebrew
    """
    metadata = [term for term in metadata if term in ["race", "class", "subclass"]]
    print(f'Searching for: {query} with set metadata: {metadata}')
    
    retrieved_docs = []
    
    retrieved_docs = retriever.invoke(query)
    
    # Comment that out
    return retrieved_docs
    
    if len(metadata) > 0:
        retrieved_docs = vector_store_client.similarity_search(
            query,
            filter=lambda doc: doc.metadata.get("section") == metadata,
        )
    else:
        retrieved_docs = vector_store_client.similarity_search(
            query
        )
    
    return ["source:" + x.metadata["url"] + " content:" + x.page_content for x in retrieved_docs]

query_database_tool = {
    'type': 'function',
    'function': {
        'name': 'query_database',
        'description': 'Find race or class from DnD based on provided description',
        'parameters': {
            'type': 'object',
            'required': ['query', 'metadata'],
            'properties': {
                'query': {'type': 'string', 'description': 'description of race or class, their prefered characteristics and strengths'},
                'metadata': {'type': 'string', 'description': 'additional argument to filter e.g "race", "class" or "subclass"'},
            },
        },
    },
}

def recall_conversation(query: str) -> str:
    """
    Make a query about the current conversation
    
    Args:
        query (set): What to search for
        
    Returns:
        str: The fragment of conversation
    """
    
    return query



available_functions = {
    'query_database': query_database,
    'recall_conversation': recall_conversation,
}

async def generate_response(query):
    print(query)
    
    client = ollama.AsyncClient(host=ollama_address + ":" + ollama_port)
    
    messages = [{'role': 'user', 'content': query}]
    print('Prompt:', messages[0]['content'])
    
    response: ChatResponse = await client.chat(
        model_name,
        messages=messages,
        tools=[query_database_tool, recall_conversation]
    )
    
    output = dict()
    final_response = ""
    
    if response.message.tool_calls:
        # There may be multiple tool calls in the response
        for tool in response.message.tool_calls:
            # Ensure the function is available, and then call it
            if function_to_call:= available_functions.get(tool.function.name):
                print(f'Calling function:{tool.function.name}')
                print(f'Arguments:{tool.function.arguments}')
                output[tool.function.name] = function_to_call(**tool.function.arguments)
                print(f'Function output:{output[tool.function.name]}')
            else:
                print(f'Function {tool.function.name} not found')
    # Only need to chat with the model using the tool call results
    if response.message.tool_calls:
        # Add the function response to messages for the model to use
        messages.append(response.message)
        
        # There may be multiple tool calls
        for tool in response.message.tool_calls:
            # If the response if too long
            if len(output[tool.function.name]) > 1000:
                output[tool.function.name] = await get_summary(output[tool.function.name], query)
            # Add tool responses
            messages.append({'role': 'tool', 'content': str(output[tool.function.name]), 'tool_name': tool.function.name})

        # Get final response from model with function outputs
        final_response = await client.chat(model_name, messages=messages)
        print(f'Final response: {final_response.message.content}')
    else:
        print(f'No tool calls returned from model')
        final_response = await client.chat(model_name, messages=messages)
        print(f'Final response: {final_response.message.content}')
                                            
    return final_response

def sync_gen(query):
    response = asyncio.run(generate_response(query))
    print(response["message"]["content"])
    return response["message"]["content"]
    
# Routes
app = FastAPI(
  title="LangChain Server",
  version="1.0",
  description="A simple api server using Langchain's Runnable interfaces",
)

origins = [
    "http://localhost:5173",
]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

add_routes(
    app,
    RunnableLambda(lambda x: invoke(x)),
    path="/basic_chat",
)

add_routes(
    app,
    RunnableLambda(lambda x: sync_gen(x)),
    path="/generator",
)

add_routes(
    app,
    RunnableLambda(lambda x: vector_store_client.as_retriever(x)),
    path="/retriever",
)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5678)

#await generate_response("Hi")

Overwriting server_app.py
