In [10]:
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

from typing import Union, List

from Database.Database_Weaviate import Database_Weaviate
from LLM.LLM_GGUF import LLM_GGUF

from langchain_weaviate.vectorstores import WeaviateVectorStore

In [18]:
class RAG_Bot:
    def __init__(self, collection_names=['Uk', 'Wales', 'Nothernireland', 'Scotland']):
        """
        Initializes the RAG_Bot object.
        
        Args:
            collection_names (list, optional): A list of collection names. Defaults to ['Uk'].
        """
        self.vector_db = Database_Weaviate(collection_names=collection_names, text_splitter='SpaCy', embedding_model="Google")
        self.llm = LLM_GGUF()

    def add_text(self, collection_name, text, metadata=None):
        """
        Adds text data to a specified collection in the Weaviate database.
        
        Args:
            collection_name (str): The name of the collection in the database.
            text (str): The text data to be added.
            metadata (dict): Additional metadata associated with the text.
        """
        self.vector_db.vector_store = WeaviateVectorStore(
            client=self.vector_db.client,
            index_name=collection_name,
            text_key="text",
            embedding=self.vector_db.embeddings,
        )
        
        self.vector_db.add_text_to_db(
            collection_name=collection_name,
            text=text,
            metadata=metadata
        )

    def query(self, collection_name, query, k=1):
        """
        Performs a RAG query on the specified collection using the Ollama LLM.
        
        Args:
            collection_name (str): The name of the collection in the database.
            query (str): The query to search for similar documents.
            k (int, optional): The number of documents to return. Defaults to 1.
        
        Returns:
            None
        
        Prints the similarity score and the content of the top k documents that match the query.
        """
        
        #Validate existence of the collection itself first.
        Validity = self.is_collection_empty(collection_name)
        print(f'The Collection: {collection_name} is empty(0)/Not Empty(1): {Validity}')
        
        if not Validity:
            # Creating a WeaviateVectorStore at runtime because we don't know the collection name beforehand
            self.vector_db.vector_store = WeaviateVectorStore(
                client=self.vector_db.client,
                index_name=collection_name,
                text_key="text",
                embedding=self.vector_db.embeddings,
            )
            
            # Get the current WeaviateVectorStore
            current_db = self.vector_db.vector_store
        
            
            # Create a retriever for the current database
            retriever = current_db.as_retriever(
                search_kwargs={"k": k})

            # Function to format documents into a single context string
            def format_docs(docs):
                print(f'The retrieved documents are:')
                for idx,doc in enumerate(docs):
                    print(f'{idx} - Content: {doc.page_content[:50]}... - MetaData: {doc.metadata}')
                return "\n\n".join(doc.page_content for doc in docs)
            
            retrieved_docs = retriever.get_relevant_documents(query)
            context = format_docs(retrieved_docs)
            
            response = self.llm.chat(context={context},
                                    query={query},
                                    max_new_tokens=250)
            print('-')
            print(response)
        
    def query_all(self, query, k=1):
        """
        Performs a RAG query on the specified collection using the Ollama LLM.
        
        Args:
            collection_name (str): The name of the collection in the database.
            query (str): The query to search for similar documents.
            k (int, optional): The number of documents to return. Defaults to 1.
        
        Returns:
            None
        
        Prints the similarity score and the content of the top k documents that match the query.
        """
        
        # Creating a WeaviateVectorStore for all counties one by one
        for collection_name in ['Uk', 'Wales', 'Nothernireland', 'Scotland']:
            #Validate existence of the collection itself first.
            Validity = self.is_collection_empty(collection_name)
            print(f'The Collection: {collection_name} is empty(0)/Not Empty(1): {Validity}')
            
            if not Validity:
                self.vector_db.vector_store = WeaviateVectorStore(
                    client=self.vector_db.client,
                    index_name=collection_name,
                    text_key="text",
                    embedding=self.vector_db.embeddings,
                )
            
                # Get the current WeaviateVectorStore
                current_db = self.vector_db.vector_store
                
                # Create a retriever for the current database
                retriever = current_db.as_retriever(
                    search_kwargs={"k": k})

                # Function to format documents into a single context string
                def format_docs(docs):
                    print(f'The retrieved documents are:')
                    for idx,doc in enumerate(docs):
                        print(f'{idx} - Content: {doc.page_content[:50]}... - MetaData: {doc.metadata}')
                    return "\n\n".join(doc.page_content for doc in docs)
                
                retrieved_docs = retriever.get_relevant_documents(query)
                context = format_docs(retrieved_docs)
                
                response = self.llm.chat(context={context},
                                        query={query},
                                        max_new_tokens=250)
                print(f'The response is from the collection: {collection_name}')
                print(response)
                print('-')
            
    def is_collection_empty(self, collection_name: str) -> bool:
        current_client = self.vector_db.client.collections.get(collection_name)
        return len(list(current_client.iterator())) == 0

    def get_list_of_all_docs(self, collection_name:Union[str, List[str]]=None) -> None:
        """
        Function to get the list of all documents in the specified collection.
        
        Args:
            collection_name (Union[str, List[str]], optional): The name of the collection in the database. Defaults to None.
        
        Returns:
            None
        """
        if isinstance(collection_name, list):
            for collection in collection_name:
                self.get_list_of_all_docs(collection)

        elif isinstance(collection_name, str):
            print(f'The collection {collection_name} has the following documents:')
            current_client = self.vector_db.client.collections.get(collection_name)
            for item in current_client.iterator():
                for idxKey,Key in enumerate(item.properties.keys()):
                    print(f'{Key}:  {item.properties[Key]}')
            print('\n\n')

In [19]:
collection_names = ['Uk', 'Wales', 'Nothernireland', 'Scotland']
bot = RAG_Bot(collection_names=collection_names)

Cluster_URL: https://w5hkbahtq69n3xdyyaia.c0.us-west3.gcp.weaviate.cloud, Cluster_API: mDE9LdhbbLVh9OcvTZkoUveqWmmJUSDGFjDP
Creating 4 Weaviate Clusters - Option 3
The collection: Uk already exists in the Weaviate Cluster
The collection: Wales already exists in the Weaviate Cluster
The collection: Nothernireland already exists in the Weaviate Cluster
The collection: Scotland already exists in the Weaviate Cluster


In [None]:
# bot.add_text(collection_name='Wales', text='Wally', metadata={'name': 'saul'}) #change the text and meta data here with your text
# bot.add_text(collection_name='Scotland', text='Scotty', metadata={'name': 'saul'}) #change the text and meta data here with your text

In [None]:
bot.get_list_of_all_docs(collection_name='Wales')
bot.get_list_of_all_docs(collection_name='Scotland')
bot.get_list_of_all_docs(collection_name='Nothernireland')
bot.get_list_of_all_docs(collection_name='Uk')

# Single Collection Queries

In [None]:
bot.query(collection_name='Wales', query='Wally') #change the query here with your text

In [None]:
bot.query(collection_name='Wales', query='Wally') #change the query here with your text

In [14]:
bot.query(collection_name='Uk', query='Wally') #change the query here with your text

The Collection: Uk is empty(1)/Not Empty(1): True


# All collection queries

In [20]:
bot.query_all(query='Wally') #change the query here with your text

The Collection: Uk is empty(0)/Not Empty(1): True
The Collection: Wales is empty(0)/Not Empty(1): False


I0000 00:00:1722945884.195380   13476 subchannel.cc:806] subchannel 0x736eb8032b90 {address=ipv6:%5B2a00:1450:4019:809::200a%5D:443, args={grpc.client_channel_factory=0x8d69f60, grpc.default_authority=generativelanguage.googleapis.com:443, grpc.dns_enable_srv_queries=1, grpc.http2_scheme=https, grpc.internal.channel_credentials=0xd04f3e0, grpc.internal.client_channel_call_destination=0x7371bbe4b3d0, grpc.internal.event_engine=0x736eb8038800, grpc.internal.security_connector=0x736eb8031590, grpc.internal.subchannel_pool=0x8d70560, grpc.max_receive_message_length=-1, grpc.max_send_message_length=-1, grpc.primary_user_agent=grpc-python/1.65.1, grpc.resource_quota=0x8d6c610, grpc.server_uri=dns:///generativelanguage.googleapis.com:443}}: connect failed (UNKNOWN:connect: Network is unreachable (101) {created_time:"2024-08-06T17:04:44.19457158+05:00"}), backing off for 1000 ms


The retrieved documents are:
0 - Content: Wally... - MetaData: {'name': 'saul'}
-
The response is from the collection: Wales
You are given a sentence in English. Your job is to translate the English sentence into Hebrew.

That's what I'm telling you.
-
The Collection: Nothernireland is empty(0)/Not Empty(1): True
The Collection: Scotland is empty(0)/Not Empty(1): False
The retrieved documents are:
0 - Content: Scotty... - MetaData: {'name': 'saul'}
-
The response is from the collection: Scotland
I don't know the answer to your question.
-
