# Jupyter Notebook Interactive Mode Demo
This is a scripting page that prints actual raw data in Python.

In [76]:
# Importing necessary libraries
# 1. Core Packages: dotenv, requests, httpx, and pymilvus.
# 2. LangChain and Extensions:
#     Langchain is used for processing language chains, and Milvus is the vector database for storing embeddings.
#     langchain: Core package.
#     langchain-core, langchain-mistralai, langchain-cohere, langchain-milvus, langchain-community, langchain-text-splitters, and langchain-huggingface for additional modules.

In [77]:
try:
    import os
    from dotenv import load_dotenv
    from langchain.chains.combine_documents import create_stuff_documents_chain
    from langchain.schema import Document
    from langchain_core.prompts import PromptTemplate
    from langchain_mistralai.chat_models import ChatMistralAI
    from langchain_milvus import Milvus
    from langchain_community.document_loaders import WebBaseLoader, RecursiveUrlLoader
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    from langchain.chains import create_retrieval_chain
    from langchain_huggingface import HuggingFaceEmbeddings
    from pymilvus import connections, utility
    from requests.exceptions import HTTPError
    from httpx import HTTPStatusError
    import sqlite3
    import warnings
except ImportError:
    # Installing dependencies if not already installed
    !pip install os requests httpx pymilvus sqlite3
    !pip install langchain langchain-core langchain-mistralai langchain-cohere langchain-milvus langchain-community langchain-text-splitters langchain-huggingface
    # Imports for RAG and other functionality
    import os
    # from dotenv import load_dotenv
    from langchain.chains.combine_documents import create_stuff_documents_chain
    from langchain.schema import Document
    from langchain_core.prompts import PromptTemplate
    from langchain_mistralai.chat_models import ChatMistralAI
    from langchain_milvus import Milvus
    from langchain_community.document_loaders import WebBaseLoader, RecursiveUrlLoader
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    from langchain.chains import create_retrieval_chain
    from langchain_huggingface import HuggingFaceEmbeddings
    from pymilvus import connections, utility
    from requests.exceptions import HTTPError
    from httpx import HTTPStatusError
    import sqlite3
    import warnings

warnings.filterwarnings('ignore')

# Display to confirm successful import
print("Dependencies imported successfully.")

Dependencies imported successfully.


In [80]:
COHERE_API_KEY='JPZrN72mRP2r9W5Dedo3Zk18VUS8voCHIPD7WLly'
MISTRAL_API_KEY='aCp49AkZAXNhTUgFziRT3Q8uO0GgWjBj'
MILVUS_URI = "milvus/milvus_vector.db"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

def vector_store_check(uri):
    # Create the directory if it does not exist
    head = os.path.split(uri)
    os.makedirs(head[0], exist_ok=True)
    
    # Connect to the Milvus database
    connections.connect("default",uri=uri)

    # Return True if exists, False otherwise
    return utility.has_collection("IT_support")

def get_embedding_function():
    """
    returns embedding function for the model

    Returns:
        embedding function
    """
    embedding_function = HuggingFaceEmbeddings(model_name=MODEL_NAME)
    return embedding_function

def load_documents_from_web():
    """
    Load the documents from the web and store the page contents

    Returns:
        list: The documents loaded from the web
    """
    loader = RecursiveUrlLoader(
        url=CORPUS_SOURCE,
        prevent_outside=True,
        base_url=CORPUS_SOURCE
        )
    documents = loader.load()
    
    return documents

def load_existing_db(uri=MILVUS_URI):
    """
    Load an existing vector store from the local Milvus database specified by the URI.

    Args:
        uri (str, optional): Path to the local milvus db. Defaults to MILVUS_URI.

    Returns:
        vector_store: The vector store created
    """
    # Load an existing vector store
    vector_store = Milvus(
        collection_name="IT_support",
        embedding_function = get_embedding_function(),
        connection_args={"uri": uri},
    )
    print("Vector Store Loaded")
    return vector_store

def split_documents(documents):
    """
    Split the documents into chunks

    Args:
        documents (list): The documents to split

    Returns:
        list: list of chunks of documents
    """
    # Create a text splitter to split the documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,  # Split the text into chunks of 1000 characters
        chunk_overlap=300,  # Overlap the chunks by 300 characters
        is_separator_regex=False,  # Don't split on regex
    )
    # Split the documents into chunks
    docs = text_splitter.split_documents(documents)
    return docs

def create_vector_store(docs, embeddings, uri):
    """
    This function initializes a vector store using the provided documents and embeddings.

    Args:
        docs (list): A list of documents to be stored in the vector store.
        embeddings : A function or model that generates embeddings for the documents.
        uri (str): Path to the local milvus db

    Returns:
        vector_store: The vector store created
    """
    # Create a new vector store and drop any existing one
    vector_store = Milvus.from_documents(
        documents=docs,
        embedding=embeddings,
        collection_name="IT_support",
        connection_args={"uri": uri},
        drop_old=True,
    )
    print("Vector Store Created")
    return vector_store

print("Config vaiables and required functions loaded")

Config vaiables and required functions loaded


In [81]:
def initialize_milvus(uri: str=MILVUS_URI):
    """
    Initialize the vector store for the RAG model

    Args:
        uri (str, optional): Path to the local milvus db. Defaults to MILVUS_URI.

    Returns:
        vector_store: The vector store created
    """
    if vector_store_check(uri):
        vector_store = load_existing_db(uri)
    else:
        embeddings = get_embedding_function()
        print("Embeddings Loaded")
        documents = load_documents_from_web()
        print("Documents Loaded")
        print(len(documents))
    
        # Split the documents into chunks
        docs = split_documents(documents=documents)
        print("Documents Splitting completed")
    
        vector_store = create_vector_store(docs, embeddings, uri)

    return vector_store

initialize_milvus()

ModuleNotFoundError: No module named 'milvus_lite'

In [82]:
def create_prompt():
    """
    Create a prompt template for the RAG model

    Returns:
        PromptTemplate: The prompt template for the RAG model
    """
    # Define the prompt template
    PROMPT_TEMPLATE = """
    Human: You are an AI assistant, and provides answers to questions by using fact based and statistical information when possible.
    Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags.
    Only use the information provided in the <context> tags.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    <context>
    {context}
    </context>

    <question>
    {input}
    </question>

    The response should be specific and use statistics or numbers when possible.

    Assistant:"""

    # Create a PromptTemplate instance with the defined template and input variables
    prompt = PromptTemplate(
        template=PROMPT_TEMPLATE, input_variables=["context", "question"]
    )
    print("Prompt Created")

    # Return the created prompt template to be used with the RAG model
    return prompt

print(create_prompt())

Prompt Created
input_variables=['context', 'input'] template="\n    Human: You are an AI assistant, and provides answers to questions by using fact based and statistical information when possible.\n    Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags.\n    Only use the information provided in the <context> tags.\n    If you don't know the answer, just say that you don't know, don't try to make up an answer.\n    <context>\n    {context}\n    </context>\n\n    <question>\n    {input}\n    </question>\n\n    The response should be specific and use statistics or numbers when possible.\n\n    Assistant:"
