In [None]:
#this notebook contains the code to make a legal assistant using Rag and Gemini

In [None]:
!pip install langchain langchain_openai weaviate-client pypdf langchain-community tiktoken chromadb langchain-google-genai ipywidgets

In [None]:
import os
import google.generativeai as genai

# Replace this with your actual API key
os.environ["GOOGLE_API_KEY"] = "Gemini api key"
os.environ["OPENAI_API_KEY"] = "OpenAI api key"

In [None]:
from langchain.document_loaders import TextLoader
from google.colab import drive
drive.mount('/content/drive')

txt_path = "/content/drive/MyDrive/sell_buy_rules.txt"

# Load the text file
loader = TextLoader(txt_path, encoding="utf-8")
documents = loader.load()

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", "،", " "]
)
chunks = text_splitter.split_documents(documents)

print(f"✅ Total chunks: {len(chunks)}")
print(f"📄 Sample chunk:\n\n{chunks[0].page_content[:500]}")


In [None]:
!echo $OPENAI_API_KEY

In [None]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings)


In [None]:
import os
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI



openai_api_key = os.getenv("OPENAI_API_KEY")
#llm = OpenAI(temperature=1.3, openai_api_key=openai_api_key)
llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", temperature=0.7)

retriever = vector_store.as_retriever()
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,  # Chat model like Gemini or ChatOpenAI
    retriever=retriever,
    return_source_documents=True,
    verbose=True,
    chain_type="stuff"
)

In [None]:
# Let's perform a similarity search in our vector store
print("\n--- Testing Similarity Search in Vector Store ---")
#test_query = "طلاق بائن در چه مواردی است؟"
test_query = "در صورت وجود عیب پنهانی در مبیع، چه حقوقی برای مشتری در نظر گرفته شده است؟"
print(f"Searching for documents similar to: '{test_query}'")


# Perform a similarity search. 'k=2' retrieves the top 2 most similar chunks
try:
    similar_docs = vector_store.similarity_search(test_query, k = 2)
    print(f"\nFound {len(similar_docs)} similar documents:")

    # Display snippets of the retrieved documents and their sources
    for i, doc in enumerate(similar_docs):
        print(f"\n--- Document {i+1} ---")
        # Displaying the first 700 chars for brevity
        content_snippet = doc.page_content[:700].strip() + "..."
        source = doc.metadata.get("source", "Unknown Source")  # Get source from metadata
        print(f"Content Snippet: {content_snippet}")
        print(f"Source: {source}")

except Exception as e:
    print(f"An error occurred during similarity search: {e}")

In [None]:

from IPython.display import display
import ipywidgets as widgets

In [None]:
from IPython.display import display, Markdown
import ipywidgets as widgets

def ask_question():
    question_box = widgets.Textarea(
        placeholder='پرسش خود را اینجا وارد کنید...',
        description='❓ سوال:',
        layout=widgets.Layout(width='100%', height='100px')
    )

    submit_button = widgets.Button(description="ارسال", button_style='success')
    output_area = widgets.Output()

    def on_click_submit(b):
        output_area.clear_output()
        question = question_box.value.strip()

        if question.lower() in ['خروج', 'exit', 'quit']:
            with output_area:
                display(Markdown("✅ **پایان گفتگو.**"))
            return

        # Run the RAG pipeline
        result = qa_chain.invoke({"query": question})
        answer = result['result']
        sources = result['source_documents']

        # Format the answer nicely in Markdown
        markdown_response = f"""### ✅ پاسخ:
{answer.strip()}

---

### 📄 منابع استفاده‌شده:
""" + '\n'.join([f"- {doc.metadata.get('source', 'بخش از سند')}" for doc in sources])

        with output_area:
            display(Markdown(markdown_response))

        # Restart input loop
        display(question_box, submit_button, output_area)

    submit_button.on_click(on_click_submit)
    display(question_box, submit_button, output_area)

# Start
ask_question()


In [None]:

import os
from IPython.display import Markdown, display
os.environ["GOOGLE_API_KEY"] = "Gemini api key"

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", temperature=0.7)
response = llm.invoke("در صورت وجود عیب پنهانی در مبیع، چه حقوقی برای مشتری در نظر گرفته شده است؟")
display(Markdown(response.content))