In [85]:
import os 
import streamlit
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
import dotenv 
dotenv.load_dotenv()


True

# login hugging face and setup llm

In [54]:

from huggingface_hub import login
llm=ChatGroq(
    groq_api_key=os.getenv('GROQ_API_KEY'),
    model_name='llama-3.1-70b-versatile',
    temperature=0.2,
    
    
)
login(token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),add_to_git_credential=True)


Token is valid (permission: fineGrained).
Your token has been saved in your configured git credential helpers (manager).
Your token has been saved to C:\Users\26amr\.cache\huggingface\token
Login successful


# split the data into chunks 

In [55]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore
# import faiss
# embeddings=OllamaEmbeddings()
# loader=WebBaseLoader('https://en.wikipedia.org/wiki/Elon_Musk')
# docs=loader.load()
# text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
# documents=text_splitter.split_documents(docs)
# print(documents)

documents=SimpleDirectoryReader('../data/').load_data()



In [56]:
import faiss
d=1536
faiss_index=faiss.IndexFlatL2(d)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import Settings
embed_model=HuggingFaceEmbedding(
    model_name='BAAI/bge-small-en-v1.5'
)
# document_text=[doc.page_content for doc in documents]
# embeddings=[embed_model.embed_query(text) for text in document_text]



In [57]:
from llama_index.core import Settings

Settings.chunk_size=1024
Settings.embed_model=embed_model
Settings.llm=llm

In [None]:
index=VectorStoreIndex.from_documents(documents=documents,embed_model=Settings.embed_model)
# save the data 
index.storage_context.persist('../data/')


In [None]:
# load index from storage
storage_context=StorageContext.from_defaults(persist_dir='../data/')
index=load_index_from_storage(storage_context)

# create a retriever or query engine 
retriever=index.as_retriever()
query_engine=index.as_query_engine()


In [93]:
response=query_engine.query('what are the rules i should follow?')
response.response

'When determining the constitutionality of a law, the following rules must be considered:\n\n1. A clear violation of the Constitution, a clear usurpation of power prohibited, is required to justify pronouncing an act of the legislative department unconstitutional and void.\n2. Courts should never declare a statute void unless the nullity and invalidity of the act are placed beyond a reasonable doubt.\n3. A reasonable doubt must be solved in favor of the legislative action, and the act sustained.\n4. Legislative power, except where the Constitution has imposed limits upon it, is practically absolute.\n5. Limitations upon legislative power are to be strictly construed and are not to be given effect as against the general power of the Legislature unless such limitations clearly inhibit the act in question.\n6. The court should hesitate long and be convinced beyond a reasonable doubt before pronouncing an act of Congress invalid. The argument should amount almost to a demonstration. If dou

# bot context 

In [None]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.prompts import ChatPromptTemplate
from llama_index.core.memory import BaseMemory
memory=BaseMemory()
# Text QA Prompt
chat_text_qa_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            """ 
            You are an experienced immigration lawyer providing detailed legal advice. 
            Carefully review the following documents to extract relevant information and address the client's question. 
            Analyze any legal regulations, risks, or implications that may apply, and provide guidance on the next steps.

                Instructions:
                1. Identify key legal issues or relevant regulations from the provided documents.
                2. Explain how the information applies to the clientâ€™s situation.
                3. Highlight any significant risks, legal obligations, or considerations the client should be aware of.
                4. Offer clear advice on the next steps, considering short-term and long-term outcomes.
                5. Note any additional documentation, evidence, or forms the client should prepare

                Please provide a structured and thorough response, clearly addressing each point in a way that helps the client understand their legal standing and options.
            """
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information and not prior knowledge, "
            "answer the query.\n"
            "Query: {query_str}\n"
            "Answer: "
        ),
    ),
]
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

# Refine Prompt
chat_refine_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            "You are an expert Q&A system that strictly operates in two modes "
            "when refining existing answers:\n"
            "1. **Rewrite** an original answer using the new context.\n"
            "2. **Repeat** the original answer if the new context isn't useful.\n"
            "Never reference the original answer or context directly in your answer.\n"
            "When in doubt, just repeat the original answer."
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "New Context: {context_msg}\n"
            "Query: {query_str}\n"
            "Original Answer: {existing_answer}\n"
            "New Answer: "
        ),
    ),
]
refine_template = ChatPromptTemplate(chat_refine_msgs)


In [104]:
print(
    index.as_query_engine(
        text_qa_template=text_qa_template, refine_template=refine_template
    ).query("what are some things i sohuld avoid?")
)

Based on the context information, here are some things you should avoid:

1. **Registering invalid bonds**: As a Comptroller, you are advised that registering bonds that appear to be invalid would be a violation of the law and a disregard of your duty.
2. **Fraudulent activities**: The documents suggest that certain actions or omissions could open up opportunities for fraud, particularly in the administration of land laws.
3. **Misunderstanding tax obligations**: Insurance companies should be aware of their tax obligations, including annual State taxes and ad valorem taxes on real and personal property.
4. **Ignoring patent land requirements**: To entitle a person to patent land purchased under the act of 1887, they must meet specific requirements, including "occupancy" of the land and "residence as a home" upon it for a period of three consecutive years.
5. **Disregarding county judge's authority**: The county judge has the authority to draw warrants in favor of officers for costs due

In [110]:
# include chat history and memory
from llama_index.core.memory import ChatMemoryBuffer

memory= ChatMemoryBuffer.from_defaults(token_limit=1500)
chat_engine=index.as_chat_engine(
    chat_mode='context',
    memory=memory,
    system_prompt=(text_qa_template,refine_template),
)


In [113]:
response=chat_engine.chat('what was the latest law passed?')
response.response

"Based on the provided context information, the latest law passed that I could find is the act of May 2, 1893, mentioned in the document. However, I couldn't find any specific details about what this law entails or what changes it brings. If you could provide more context or clarify what you are looking for, I'll do my best to help."

In [114]:
response=chat_engine.chat('when was it passed?')
response.response

'According to the context information, the act of May 2, 1893, was passed on May 2, 1893.'

In [115]:
response=chat_engine.chat('can you tell me more')
response.response

'Unfortunately, the provided context information does not give more details about the act of May 2, 1893. However, it does mention Chapter 84 of the Acts of 1893, which provides that counties in Texas have the authority to issue bonds for purchasing or constructing bridges for public purposes within the county or across a stream that constitutes a boundary line of a county.'