In [None]:
from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage
from IPython.display import display, Markdown, update_display

In [None]:
model = ChatOllama(model = 'gpt-oss:latest')

messages = HumanMessage(content = '50 words about Kratos')

stream = model.stream(input = [messages])
display_handle = display(Markdown(""), display_id = True)

response = ''
for chunk in stream:
    response += chunk.content or ''
    update_display(Markdown(response), display_id = display_handle.display_id)


In [None]:
import glob
import os
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage, convert_to_messages
from langchain_chroma import Chroma

In [None]:
DB_NAME = "vector_db"
EMBEDDING_MODEL = "all-minilm"

In [None]:
knowledge_base_files = glob.glob(pathname = 'knowledge-base/**/*.md', recursive = True)

knowledge_base = ''

for file in knowledge_base_files:
    with open(file, 'r') as data:
        knowledge_base += data.read()
        knowledge_base += '\n\n\n'

print(f"Total files: {len(knowledge_base_files)}, Chars: {len(knowledge_base)}")

In [None]:
knowledge_base_folders = glob.glob(pathname = 'knowledge-base/*')

documents = []

# for folder in knowledge_base_folders:
for folder in knowledge_base_folders:
    print(folder)
    files = DirectoryLoader(
        path = folder, 
        glob = "**/*.md", 
        recursive = True, 
        loader_cls = TextLoader,
        loader_kwargs = {"encoding":'utf-8'}).lazy_load()

    for file in files:
        file.metadata['doc_type'] = os.path.basename(folder)
        documents.append(file)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512, chunk_overlap = 64)
chunks = text_splitter.split_documents(documents = documents)

In [None]:
embeddings = OllamaEmbeddings(model = EMBEDDING_MODEL)

Chroma(persist_directory = DB_NAME, embedding_function = embeddings).delete_collection()

In [None]:
vectordb = Chroma.from_documents(
    documents = chunks,
    embedding = embeddings,
    persist_directory = DB_NAME
)

In [None]:
vectors = vectordb._collection.count()
dimensions = vectordb.get(limit = 1, include = ['embeddings']).get('embeddings', []).shape

print(f"Vectors: {vectors} with dimensions: {dimensions}")

In [None]:
SYSTEM_PROMPT_TEMPLATE = """
You are a knowledgeable, friendly assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""

In [None]:
retriever = vectordb.as_retriever()
llm = ChatOllama(model = 'gpt-oss:latest')

In [None]:
def chat(message, history):
    history = convert_to_messages(history)
    context = retriever.invoke(message)
    system = SystemMessage(content = SYSTEM_PROMPT_TEMPLATE.format(context = context))
    user = HumanMessage(content = message)
    stream = llm.stream(input = [system, user])

    response = ''
    for chunk in stream:
        response += chunk.content or ''
        yield response

In [None]:
from gradio import ChatInterface

In [None]:
ChatInterface(fn = chat, type = 'messages').launch()