In [1]:
# imports

import os
import glob
from dotenv import load_dotenv
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# imports for langchain

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
# from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import requests

In [3]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [4]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [5]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")

# With thanks to CG and Jon R, students on the course, for this fix needed for some users
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)

In [7]:
len(chunks)

123

In [8]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: employees, company, contracts, products


In [9]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite

embeddings = OpenAIEmbeddings()

# Create vectorstore

# BEFORE
# vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

# AFTER
vectorstore = FAISS.from_documents(chunks, embedding=embeddings)

total_vectors = vectorstore.index.ntotal
dimensions = vectorstore.index.d

print(f"There are {total_vectors} vectors with {dimensions:,} dimensions in the vector store")

There are 123 vectors with 1,536 dimensions in the vector store


In [54]:
def query_ollama(prompt, model):
    url = "http://localhost:11434/api/generate"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }

    response = requests.post(url, json=payload, headers=headers)

    try:
        data = response.json()
    except Exception as e:
        raise ValueError(f"Invalid JSON returned by Ollama: {response.text}") from e

    if "response" not in data:
        raise ValueError(f"Ollama returned unexpected response: {data}")

    return data["response"]


In [83]:
from langchain_core.runnables import Runnable
from langchain_core.messages import AIMessage
from typing import Union

class LangchainOllamaRunnable(Runnable):
    def __init__(self, model_name="tinyllama"):
        self.model_name = model_name
    def invoke(self, input, *arg, **kwargs):
        if isinstance(input, dict):
            input = input.get("question", "")
        else:
            input = str(input)

        response = query_ollama(input, model=self.model_name)
        return AIMessage(content=response)


In [49]:
class OllamaRunnable(Runnable):
    def __init__(self, model_name="llama3"):
        self.model = model_name

    def invoke(self, input):
        if isinstance(input, dict):
            prompt = input.get("question", "")
        else:
            prompt = str(input)

        return {"answer": query_ollama(prompt, model=self.model)}


In [84]:
USE_OLLAMA = True  # Set to False if you want to use OpenAI

llm = LangchainOllamaRunnable(model_name="tinyllama")


In [85]:
# create a new Chat with OpenAI
# llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages=True)


# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [88]:
query = "Can you describe Insurellm in a few sentences"
conversation_chain.invoke({
    "question": query,
})

ValueError: One input key expected got ['chat_history', 'question']

In [89]:
# Wrapping that in a function

def chat(message, history):
    result = conversation_chain.invoke({
        "question": message,
        "chat_history": history or []
    })
    return result["answer"]


In [90]:
view = gr.ChatInterface(chat).launch(inbrowser=True)

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
