In [1]:
%pwd

'd:\\training\\ml_algorithms\\Level_2_Project_Streamlit\\ChatBot\\ChatbotProject\\research'

In [2]:
import os
os.chdir("../")
%pwd

'd:\\training\\ml_algorithms\\Level_2_Project_Streamlit\\ChatBot\\ChatbotProject'

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [4]:
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf(data='Data/')
extracted_data

[Document(metadata={'producer': 'Adobe PDF Library 9.9', 'creator': 'Adobe InDesign CS5 (7.0)', 'creationdate': '2013-04-06T17:57:46+05:30', 'moddate': '2013-04-08T15:14:22+05:30', 'trapped': '/False', 'source': 'Data\\LABOUR_LAWS.pdf', 'total_pages': 253, 'page': 0, 'page_label': '1'}, page_content='Labour Laws\nDCOM207'),
 Document(metadata={'producer': 'Adobe PDF Library 9.9', 'creator': 'Adobe InDesign CS5 (7.0)', 'creationdate': '2013-04-06T17:57:46+05:30', 'moddate': '2013-04-08T15:14:22+05:30', 'trapped': '/False', 'source': 'Data\\LABOUR_LAWS.pdf', 'total_pages': 253, 'page': 1, 'page_label': '2'}, page_content='LABOUR LAWS'),
 Document(metadata={'producer': 'Adobe PDF Library 9.9', 'creator': 'Adobe InDesign CS5 (7.0)', 'creationdate': '2013-04-06T17:57:46+05:30', 'moddate': '2013-04-08T15:14:22+05:30', 'trapped': '/False', 'source': 'Data\\LABOUR_LAWS.pdf', 'total_pages': 253, 'page': 2, 'page_label': '3'}, page_content='Copyright © 2013 B D Singh\nAll rights reserved\nProduc

In [6]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
len(text_chunks)

1662

In [8]:
from langchain.embeddings import HuggingFaceEmbeddings


In [9]:
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [10]:
embeddings = download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# pip install -U sentence-transformers

In [12]:
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')

In [17]:
from pinecone.grpc import PineconeGRPC as pinecone
from pinecone import ServerlessSpec
import os

pc = pinecone(api_key=PINECONE_API_KEY)
index_name = "labourlawchatbot"
pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

{
    "name": "labourlawchatbot",
    "metric": "cosine",
    "host": "labourlawchatbot-4xuozts.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [18]:
index = pc.Index(name=index_name)

In [19]:
from langchain_groq import ChatGroq
load_dotenv()
groq_key = os.getenv("GROQ")
groq_model = ChatGroq(
    model="qwen-2.5-32b",
    groq_api_key=groq_key)

In [20]:
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
)

In [21]:
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [22]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1ee72a82800>

In [23]:
retriever = docsearch.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k":3
    }
)

In [25]:
retrieved_docs = retriever.invoke(
    "What is Constitutional Directives to Labour Laws"
)
retrieved_docs

[Document(id='86187cb4-0a9f-4039-9ced-1ae8e985f950', metadata={'creationdate': '2013-04-06T17:57:46+05:30', 'creator': 'Adobe InDesign CS5 (7.0)', 'moddate': '2013-04-08T15:14:22+05:30', 'page': 9.0, 'page_label': '4', 'producer': 'Adobe PDF Library 9.9', 'source': 'Data\\LABOUR_LAWS.pdf', 'total_pages': 253.0, 'trapped': '/False'}, page_content='The Directive Principles spell out the socio-economic objectives of the national policy to be realised \nby labour; legislation as well as by other legislations. These are directives to the legislature, \nexecutive and the judiciary, which are committed to make, interpret and enforce law.\n1.2.2 Constitutional Limitations on Labour Laws\nAlthough labour policy seeks to create high minimum standards of employment, the choice'),
 Document(id='58348bc3-bc64-4fea-b4ec-87a3b3fb537e', metadata={'creationdate': '2013-04-06T17:57:46+05:30', 'creator': 'Adobe InDesign CS5 (7.0)', 'moddate': '2013-04-08T15:14:22+05:30', 'page': 9.0, 'page_label': '4', '

In [27]:
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
    "You are an asistant for question-answering tasks. "
    "Use the following pieces of retrived context to answer "
    "the question, if you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", system_prompt
        ),
        (
            "human", "{input}"
        )
    ]
)

In [30]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
question_answer_chain = create_stuff_documents_chain(
    groq_model,
    prompt
)
rag_chain = create_retrieval_chain(
    retriever,
    question_answer_chain
)

In [32]:
response = rag_chain.invoke(
    {
        "input": "What is Constitutional Directives to Labour Laws"
    }
)
response['answer']

'Constitutional Directives to Labour Laws are socio-economic objectives outlined in the Constitution that guide the creation, interpretation, and enforcement of labour laws. These directives commit the legislature, executive, and judiciary to ensure high standards of employment and justice for citizens.'

In [33]:
response

{'input': 'What is Constitutional Directives to Labour Laws',
 'context': [Document(id='86187cb4-0a9f-4039-9ced-1ae8e985f950', metadata={'creationdate': '2013-04-06T17:57:46+05:30', 'creator': 'Adobe InDesign CS5 (7.0)', 'moddate': '2013-04-08T15:14:22+05:30', 'page': 9.0, 'page_label': '4', 'producer': 'Adobe PDF Library 9.9', 'source': 'Data\\LABOUR_LAWS.pdf', 'total_pages': 253.0, 'trapped': '/False'}, page_content='The Directive Principles spell out the socio-economic objectives of the national policy to be realised \nby labour; legislation as well as by other legislations. These are directives to the legislature, \nexecutive and the judiciary, which are committed to make, interpret and enforce law.\n1.2.2 Constitutional Limitations on Labour Laws\nAlthough labour policy seeks to create high minimum standards of employment, the choice'),
  Document(id='58348bc3-bc64-4fea-b4ec-87a3b3fb537e', metadata={'creationdate': '2013-04-06T17:57:46+05:30', 'creator': 'Adobe InDesign CS5 (7.0)'