In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import numpy as np
import re
from langchain.schema import Document
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

In [12]:

#Importing all documents from the folder - artifacts and cleaning - /n removed, - replaced. 
path_to_pdfs = (r"D:\LLM\projects\Questionme\artifacts")
loader = PyPDFDirectoryLoader(path_to_pdfs)
pdf_documents = loader.load()

def cleanup(document):
    document = document.replace('\n', ' ')
    document = document.replace('-', '')
    document = ' '.join(document.split())
    document = re.sub(r'\n+', ' ', document)
    return document

cleaned_documents = [cleanup(doc.page_content) for doc in pdf_documents]
cleaned_document_objects = [Document(page_content=cleaned_text, metadata=doc.metadata) 
                             for cleaned_text, doc in zip(cleaned_documents, pdf_documents)]
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
final_documents=text_splitter.split_documents(cleaned_document_objects)
texts = [doc.page_content for doc in final_documents]
text = texts


In [13]:
len(text)

109

In [14]:
llm = CTransformers(
    model=r"D:\LLM\models\llama-2-7b-chat.ggmlv3.q8_0.bin",
    model_type="llama",
    config={'max_new_tokens': 300, 'temperature': 0.03,'context_length': 1000}
)

In [16]:

huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True})



In [17]:


embeddings = [huggingface_embeddings.embed_query(text) for text in texts]
embeddings_array = np.array(embeddings)
print(embeddings_array)

[[ 0.06778146  0.01100297  0.02574205 ...  0.00751806  0.06441373
  -0.03051739]
 [-0.0693738  -0.02106547 -0.03994767 ... -0.00298001  0.05587736
   0.04344444]
 [-0.03051266  0.08318012 -0.02475407 ... -0.06984834  0.04987878
   0.01845735]
 ...
 [ 0.05832506  0.07038648  0.02622284 ...  0.07168836 -0.01516775
  -0.02078729]
 [ 0.08588448  0.01911633  0.0015234  ...  0.04203937  0.02636152
  -0.02784703]
 [-0.01472853 -0.00520878 -0.00508864 ...  0.07673403 -0.00523054
  -0.04715177]]


In [18]:
vectorstore = FAISS.from_documents(final_documents, huggingface_embeddings)

In [19]:


# Define the prompt template
template = """
You are an expert in UK parking rules, penalties, and fixed penalty details. Answer the question precisely. If you're not 100 percent sure of the answer, respond with 'not sure' and request the user to check updated and correct information or reach a trustworthy solicitor.

Context: {context}

Question: {question}

Answer:
"""

qa_prompt = PromptTemplate(template=template, input_variables=['context', 'question'])

retriever = vectorstore.as_retriever(search_kwargs={'k': 2})

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={'prompt': qa_prompt}
)

user_input = "Who sets the level of FPN's"

result=chain({'query':user_input})
print(f"Answer:{result['result']}")



Answer: The level of fixed penalty notices (FPNs) is set by local authorities, which includes unitary authorities, county councils, metropolitan district councils, and London Boroughs. Although in practice, some of these may have an agency agreement with city or district councils to act on their behalf on traffic matters.

Please note that the information provided is based on the Road Traffic Act 1991 and other relevant legislation, but it is not a substitute for legal advice. If you are unsure about any aspect of parking rules, penalties, or fixed penalty details, please consult an updated and correct source or seek the advice of a trustworthy solicitor.
