##### Retrieval Augmented Generation Chatbot: Campus Knowledge Base Prototype

##### Importing libraries

In [14]:
import os
import glob
import numpy as np
import gradio as gr
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.vectorstores import Chroma 

In [15]:
MODEL = 'llama3.2:3b'
db_name = 'chroma_db'

In [16]:
# --- Load Documents ---
files = glob.glob('../knowledge_base*')
text_loader_kwargs = {'encoding': 'utf-8'}
documents = []

for file in files:
    doc_type = os.path.basename(file)
    loader = DirectoryLoader(
        '../knowledge_base',
        glob='*.txt',
        loader_cls=TextLoader,
        loader_kwargs=text_loader_kwargs
    )
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata['doc_type'] = doc_type
        documents.append(doc)

print(f" Successfully loaded {len(documents)} documents")

 Successfully loaded 11 documents


In [17]:
# --- Split into Chunks ---
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks")

Split into 11 chunks


In [18]:
len(chunks)

11

In [19]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: knowledge_base


In [20]:
# --- Create Chroma Vector Store ---
embeddings = OllamaEmbeddings(model=MODEL)

In [21]:
# Store persistently in a folder (optional for reloading later)
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=db_name
)
vectorstore.persist()
print(f" ChromaDB created at: {db_name}")

 ChromaDB created at: chroma_db


  vectorstore.persist()


In [22]:
# --- Setup Conversational Retrieval ---
llm = ChatOllama(model=MODEL)
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [23]:
query = 'Can you tell me about AAMUSTED in few sentences?'
result = conversation_chain.invoke({"question":query})
print(result["answer"])

AAMUSTED is a premier institution for STEM and Education in the region, established in 2018 with recognition from the National Accreditation Board (NAB). It offers a unique curriculum designed to meet the challenges of the 21st century, situated on a 200-acre campus in Kumasi. The university is led by Chancellor Dr. Nana Abena Mensah, who provides strategic direction and oversight for its academic and administrative functions.


In [24]:
# --- Chat Function for Gradio ---
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [25]:
# --- Launch Gradio Chat UI ---
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
