In [104]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import textwrap

In [105]:
class Runnable:
    def run(self, *args, **kwargs):
        raise NotImplementedError("Subclass must implement abstract method")

In [106]:

def load_pdf_data(file_path):
    loader = PyMuPDFLoader(file_path=file_path)
    docs = loader.load()
    return docs

In [107]:
# Responsible for splitting the documents into several chunks

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    return chunks

In [108]:
def load_embedding_model(model_path, normalize_embedding=True):
    return HuggingFaceEmbeddings(
        model_name=model_path,
        model_kwargs={'device':'cpu'}, 
        encode_kwargs = {
            'normalize_embeddings': normalize_embedding 
        }
    )


def create_embeddings(chunks, embedding_model, storing_path = "C:\\Users\\nehan\\vectorstore"):
    

    vectorstore = FAISS.from_documents(chunks, embedding_model)
    vectorstore.save_local(storing_path)
    return vectorstore

In [None]:
template = """
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
just say you don't know. Don't try to make up an answer.

### Context:
{context}

### User:
{question}

### Response:
"""

In [110]:
def load_qa_chain(retriever, llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, 
        chain_type="stuff",
        return_source_documents=True, 
        chain_type_kwargs={'prompt': prompt} 
    )

In [111]:
def get_response(query, chain):
    response = chain({'query': query})
    
    wrapped_text = textwrap.fill(response['result'], width=100)
    print(wrapped_text)

In [112]:
from langchain.llms import OpenAI
from langchain import PromptTemplate

In [116]:
class OpenAIWrapper(Runnable):
    def __init__(self, openai_instance):
        self.openai_instance = openai_instance

    def run(self, *args, **kwargs):
        return self.openai_instance.run(*args, **kwargs)

In [124]:
import os
from langchain.llms import OpenAI

os.environ["OPENAI_API_KEY"] = "paste ur openai key here "


In [118]:
# Loading openai
llm = OpenAI()

# Loading the Embedding Model
embed = load_embedding_model(model_path = "all-MiniLM-L6-v2")

In [119]:
# loading and splitting the documents
docs = load_pdf_data(file_path=r"C:\Users\nehan\Desktop\Legal-Advisor-LLM\data\COI.pdf")
documents = split_docs(documents=docs)

# creating vectorstore
vectorstore = create_embeddings(documents, embed)

# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

In [120]:
# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template(template)

# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

In [125]:
get_response("court", chain)

Retrying langchain_community.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors..


RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

: 