In [1]:
import warnings
import importlib
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

from typing import Union, List

import VectorDataBase
importlib.reload(VectorDataBase)
from VectorDataBase import WeaviateDB

# from Saul_LLM_HF import Legal_LLM
from Saul_LLM_GGUF import Legal_LLM

In [35]:
class RAG_Bot:
    def __init__(self, collection_names=['Uk', 'Wales', 'NothernIreland', 'Scotland']):
        """
        Initializes the RAG_Bot object.
        
        Args:
            collection_names (list, optional): A list of collection names. Defaults to ['Uk'].
        """
        self.vector_db = WeaviateDB(collection_names=collection_names)
        self.llm = Legal_LLM()

    def add_text(self, collection_name, text, metadata=None):
        """
        Adds text data to a specified collection in the Weaviate database.
        
        Args:
            collection_name (str): The name of the collection in the database.
            text (str): The text data to be added.
            metadata (dict): Additional metadata associated with the text.
        """
        self.vector_db.add_text_to_db(
            collection_name=collection_name,
            text=text,
            metadata=metadata
        )

    def query(self, collection_name, query, k=1):
        """
        Performs a RAG query on the specified collection using the Ollama LLM.
        
        Args:
            collection_name (str): The name of the collection in the database.
            query (str): The query to search for similar documents.
            k (int, optional): The number of documents to return. Defaults to 1.
        
        Returns:
            None
        
        Prints the similarity score and the content of the top k documents that match the query.
        """
        current_db = self.vector_db.vector_stores[collection_name]
        
        # Create a retriever for the current database
        retriever = current_db.as_retriever(
            search_kwargs={"k": k})

        # Function to format documents into a single context string
        def format_docs(docs):
            print(f'The retrieved documents are:')
            for idx,doc in enumerate(docs):
                print(f'{idx} - Content: {doc.page_content[:50]}... - MetaData: {doc.metadata}')
            return "\n\n".join(doc.page_content for doc in docs)
        
        retrieved_docs = retriever.get_relevant_documents(query)
        context = format_docs(retrieved_docs)
        
        response = self.llm.chat(context={context},
                                query={query},
                                max_new_tokens=250)
        print('-')
        print(response)

    def get_list_of_all_docs(self, collection_name:Union[str, List[str]]=None) -> None:
        if isinstance(collection_name, list):
            for collection in collection_name:
                self.get_list_of_all_docs(collection)

        elif isinstance(collection_name, str):
            print(f'The collection {collection_name} has the following documents:')
            current_client = self.vector_db.clients[collection_name].collections.get(collection_name)
            for item in current_client.iterator():
                for idxKey,Key in enumerate(item.properties.keys()):
                    print(f'{Key}:  {item.properties[Key]}')
            print('\n\n')

In [36]:
app = RAG_Bot(
    # ['Uk', 'Wales', 'NothernIreland', 'Scotland']
    ['Uk']
)

Vector Stores Available: ['Uk']


In [37]:
app.get_list_of_all_docs(['Uk'])

The collection Uk has the following documents:
text: - Child labor is a grave social issue that deprives children of their childhood, 
    education, and the opportunity for a better future. It often involves hazardous work that 
    poses significant risks to their physical and mental health. Despite global efforts to eradicate it, 
    millions of children worldwide are still engaged in labor due to poverty, lack of access to education, 
    and inadequate enforcement of labor laws. Ending child labor requires a multifaceted approach, including 
    strengthening legal frameworks, improving educational opportunities, and addressing the root causes of poverty and inequality.
title: - None
legislation: - None
legislationType: - None
year: - None
country: - Uk


In [17]:
# app.vector_db.validate_collection()

Validating collection: Uk - Cluster Status:True
Validating collection: Wales - Cluster Status:True
Validating collection: NothernIreland - Cluster Status:True
Validating collection: Scotland - Cluster Status:True


In [18]:
# app.vector_db.delete_collection('Wales')
# app.vector_db.delete_collection('Uk')
# app.vector_db.delete_collection('NothernIreland')
# app.vector_db.delete_collection('Scotland')

In [9]:
# Add a sample text
app.add_text(
    collection_name='Uk',
    text='''Child labor is a grave social issue that deprives children of their childhood, 
    education, and the opportunity for a better future. It often involves hazardous work that 
    poses significant risks to their physical and mental health. Despite global efforts to eradicate it, 
    millions of children worldwide are still engaged in labor due to poverty, lack of access to education, 
    and inadequate enforcement of labor laws. Ending child labor requires a multifaceted approach, including 
    strengthening legal frameworks, improving educational opportunities, and addressing the root causes of poverty and inequality.''',
    metadata={'country': 'Uk'}
)

In add_text_to_db adding to Collection: Uk - Text: Child labor is a grave social issue that deprives ... - MetaData: {'country': 'Uk'}
File with data with ids: ['08cdc62e-87bf-4c14-8b6d-3b381c2badc7']


In [19]:
app.get_list_of_all_docs(['Uk'])

The collection Uk has the following documents:
Object(uuid=_WeaviateUUIDInt('08cdc62e-87bf-4c14-8b6d-3b381c2badc7'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'text': 'Child labor is a grave social issue that deprives children of their childhood, \n    education, and the opportunity for a better future. It often involves hazardous work that \n    poses significant risks to their physical and mental health. Despite global efforts to eradicate it, \n    millions of children worldwide are still engaged in labor due to poverty, lack of access to education, \n    and inadequate enforcement of labor laws. Ending child labor requires a multifaceted approach, including \n    strengthening legal frameworks, improving educational opportunities, and addressing the root causes of poverty and inequality.', 'legislationType': None, 'legislation': None, 'year': N

In [None]:
# Perform a RAG query with a sample query
app.query(
    collection_name='Uk',
    query="What are the primary factors contributing to the persistence of child labor globally?",
)

In [None]:
# Perform a RAG query with a sample query
app.query(
    collection_name='Uk',
    query="How can improving educational opportunities help in the eradication of child labor?",
)

In [None]:
# Perform a RAG query with a sample query
app.query(
    collection_name='Uk',
    query="What measures can governments and organizations take to strengthen legal frameworks against child labor?",
)