In [10]:
import os
import pinecone
from datetime import datetime, timedelta
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential
from openai import OpenAI
from openai import AzureOpenAI
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest

In [28]:
# Azure Storage Credentials
AZURE_STORAGE_ACCOUNT_NAME = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
AZURE_STORAGE_ACCOUNT_KEY = os.getenv("AZURE_STORAGE_ACCOUNT_KEY")
AZURE_CONTAINER_NAME = os.getenv("AZURE_CONTAINER_NAME_3")
AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

# Azure Document Intelligence Credentials
AZURE_DOC_INT_KEY = os.getenv("AZURE_DOC_INTELLIGENCE_KEY")
AZURE_DOC_INT_ENDPOINT = os.getenv("AZURE_DOC_INTELLIGENCE_ENDPOINT")

# Pinecone API Key
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
PINECONE_INDEX_NAME = "past-cases"


In [29]:
blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
container_client = blob_service_client.get_container_client(AZURE_CONTAINER_NAME)

doc_int_client = DocumentIntelligenceClient(AZURE_DOC_INT_ENDPOINT, AzureKeyCredential(AZURE_DOC_INT_KEY))

if not PINECONE_API_KEY:
    raise ValueError("PINECONE_API_KEY is not set.")
pc = pinecone.Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = PINECONE_INDEX_NAME
index2 = pc.Index(index_name)


In [30]:
def generate_sas_token(blob_name, expiration_minutes=60):
    """Generates a SAS token for secure access to a blob."""
    sas_token = generate_blob_sas(
        account_name=AZURE_STORAGE_ACCOUNT_NAME,
        container_name=AZURE_CONTAINER_NAME,
        blob_name=blob_name,
        account_key=AZURE_STORAGE_ACCOUNT_KEY,
        permission=BlobSasPermissions(read=True),
        expiry=datetime.utcnow() + timedelta(minutes=expiration_minutes)
    )
    return sas_token


In [31]:
def extract_text_from_pdf(blob_name):
    sas_token = generate_sas_token(blob_name)
    blob_url = f"https://{AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net/{AZURE_CONTAINER_NAME}/{blob_name}?{sas_token}"
    
    # Call Azure Document Intelligence with the URL source
    poller = doc_int_client.begin_analyze_document("prebuilt-read", AnalyzeDocumentRequest(url_source=blob_url))
    result = poller.result()

    # Extract text from lines (same as your previous logic)
    extracted_text = " ".join([line.content for page in result.pages for line in page.lines])
    return extracted_text


In [32]:
def extract_title_and_summary(text):
    """Extracts the title and summary from the extracted text."""
    lines = text.split("\n")  # Split text by lines
    title = lines[0].strip() if lines else "Untitled"  # First line as title
    summary = " ".join(lines[1:])[:300]  # First 300 characters as summary
    return title, summary

In [33]:
def chunk_text(text, chunk_size=500):
    """Splits text into chunks of fixed size."""
    words = text.split()
    chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks


In [34]:
openai_client = AzureOpenAI(
    api_key = os.getenv("EMBEDDING_API_KEY"),
    api_version = os.getenv("EMBEDDING_API_VERSION"),
    azure_endpoint=os.getenv("EMBEDDING_API_ENDPOINT")
)

def generate_embeddings(text):
    response = openai_client.embeddings.create(model="text-embedding-ada-002", input=text)
    return response.data[0].embedding


In [35]:
def process_and_store_documents():
    blob_list = container_client.list_blobs()

    for blob in blob_list:
        blob_name = blob.name
        print(f"Processing: {blob_name}")

        # Extract text from PDF
        extracted_text = extract_text_from_pdf(blob_name)

        if not extracted_text:
            print(f"⚠️ Skipping {blob_name} due to extraction failure")
            continue

        # Split summary into chunks
        chunks = chunk_text(extracted_text)

        if not chunks:
            print(f"⚠️ Skipping {blob_name} due to empty content after chunking")
            continue

        # Store each chunk in Pinecone
        vectors = []
        for i, chunk in enumerate(chunks):
            embedding = generate_embeddings(chunk)

            vectors.append({
                "id": f"{blob_name}_chunk_{i}",
                "values": embedding,
                "metadata": {"title": blob_name, "summary_chunk": chunk}
            })

        index2.upsert(vectors)

        print(f"✅ Stored {len(chunks)} chunks from {blob_name} in Pinecone.")



In [36]:
process_and_store_documents()

Processing: mohini-karnatak.pdf
✅ Stored 4 chunks from mohini-karnatak.pdf in Pinecone.


In [1]:
import os
from langchain_openai import AzureChatOpenAI

# Load environment variables (ensure they are set in your system)
OPENAI_API_KEY = os.getenv("OPENAI_GPT_API_KEY")
AZURE_ENDPOINT = os.getenv("OPENAI_GPT_ENDPOINT")

# Initialize OpenAI Model
llm = AzureChatOpenAI(
    azure_deployment="gpt-4o-mini",
    azure_endpoint=AZURE_ENDPOINT,
    api_key=OPENAI_API_KEY,
    api_version="2024-10-21",
    temperature=0.2
)

print("✅ LLM Initialized")


✅ LLM Initialized


In [2]:
def classify_query(data):
    """Classifies user input into case search or verdict prediction."""
    user_input = data.get("user_input", "").strip() if isinstance(data, dict) else str(data).strip()

    if not user_input:
        return "unknown"

    prompt = f"""
    Classify the following user query into one of the categories:
    - "case_search" if the user is looking for similar legal cases.
    - "verdict_prediction" if the user wants a verdict prediction.

    Query: "{user_input}"
    Output (case_search or verdict_prediction):
    """
    
    response = llm.invoke(prompt)
    classification = response.content.strip().lower()

    print("🟢 Classification Result:", classification)
    return classification if classification in ["case_search", "verdict_prediction"] else "unknown"

# Test it
test_input = {"user_input": "What is the verdict for breach of contract?"}
classify_query(test_input)


🟢 Classification Result: verdict_prediction


'verdict_prediction'

In [6]:
from casesearch import search_cases
from verdict import process_case

def case_search_agent(data):
    """Processes case search queries."""
    query = data.get("user_input", "") if isinstance(data, dict) else str(data)
    if not query:
        return {"error": "No query provided."}

    result = search_cases(query)  
    print("✅ Case Search Result:", result)
    return {"result": result}  

def verdict_agent(data):
    """Processes verdict prediction queries."""
    print(f"⚖️ verdict_agent received data: {data}")
    
    case_input = data.get("user_input", "") if isinstance(data, dict) else str(data)
    if not case_input:
        return {"error": "No case input provided."}
    
    print(f"🔍 Received case input: {case_input}")
    result = process_case(case_input)  # Call imported function
    
    print(f"✅ verdict_agent result: {result}")  # Debug print
    return result  # 🟢 Ensure this returns a dictionary!


# Test Verdict Agent
test_case = {"user_input": "What is the legal outcome for fraud?"}
verdict_agent(test_case)


⚖️ verdict_agent received data: {'user_input': 'What is the legal outcome for fraud?'}
🔍 Received case input: What is the legal outcome for fraud?
🔍 Received case input: What is the legal outcome for fraud?
📜 Extracting case details for: What is the legal outcome for fraud?
🧐 GPT Raw Response: ```json
{
    "case_description": "This case addresses the legal outcomes and consequences associated with committing fraud.",
    "involved_parties": "The parties involved typically include the plaintiff (the party alleging fraud) and the defendant (the party accused of committing fraud).",
    "jurisdiction": "The jurisdiction would depend on where the alleged fraud occurred, which could be a specific state or federal court.",
    "alleged_violations": "The alleged violations pertain to fraudulent activities, which may include misrepresentation, deceit, or other forms of dishonest conduct aimed at securing an unfair or unlawful gain."
}
```
✅ Parsed Case Details: {'case_description': 'This case

{'case_description': 'This case addresses the legal outcomes and consequences associated with committing fraud.',
 'involved_parties': 'The parties involved typically include the plaintiff (the party alleging fraud) and the defendant (the party accused of committing fraud).',
 'jurisdiction': 'The jurisdiction would depend on where the alleged fraud occurred, which could be a specific state or federal court.',
 'alleged_violations': 'The alleged violations pertain to fraudulent activities, which may include misrepresentation, deceit, or other forms of dishonest conduct aimed at securing an unfair or unlawful gain.',
 'verdict': 'Based on the details provided in the case description, relevant laws, and similar past cases, the most likely verdict in the case addressing the legal outcomes and consequences associated with committing fraud would likely hinge on the interpretation of the laws related to cheating under the Indian Penal Code (IPC) and false advertising under the Consumer Prote

In [7]:
from langgraph.graph import Graph

# Initialize Workflow
workflow = Graph()
workflow.add_node("classifier", classify_query)
workflow.add_node("case_search_agent", case_search_agent)
workflow.add_node("verdict_agent", verdict_agent)

def route_decision(classification):
    if classification == "case_search":
        print("✅ Routing to case_search_agent")
        return "case_search_agent"
    elif classification == "verdict_prediction":
        print("✅ Routing to verdict_agent")
        return "verdict_agent"
    else:
        print("⚠️ Invalid classification:", classification)
        return None  # Prevent double calls

workflow.add_conditional_edges("classifier", route_decision)

# ✅ Ensure these nodes are the final output points
workflow.set_finish_point("case_search_agent")
workflow.set_finish_point("verdict_agent")

workflow.set_entry_point("classifier")

# Compile workflow
app_workflow = workflow.compile()

print("🚀 Workflow Initialized Successfully!")


🚀 Workflow Initialized Successfully!


In [8]:
test_input = {"user_input": "What is the verdict for breach of contract?"}
result = app_workflow.invoke(test_input)

print("\n🔥 Final Workflow Result:", result)


🟢 Classification Result: verdict_prediction
✅ Routing to verdict_agent
⚖️ verdict_agent received data: verdict_prediction
🔍 Received case input: verdict_prediction
🔍 Received case input: verdict_prediction
📜 Extracting case details for: verdict_prediction
🧐 GPT Raw Response: ```json
{
    "case_description": "This case involves a prediction of the verdict in a legal matter, assessing the likelihood of various outcomes based on presented evidence and legal arguments.",
    "involved_parties": "The parties involved include the plaintiff, who is seeking damages, and the defendant, who is contesting the claims made against them.",
    "jurisdiction": "The case is under the jurisdiction of the state court system, specifically within the civil division.",
    "alleged_violations": "The alleged violations include breach of contract and negligence, as claimed by the plaintiff against the defendant."
}
```
✅ Parsed Case Details: {'case_description': 'This case involves a prediction of the verdi