# Smart Contract Assistant v2 (Project Compliant)

This notebook implements a complete **Smart Contract Analysis System** as per the Project Specification.

**Key Features:**
1.  **Ingestion Pipeline:** Proper chunking & embedding of PDF/DOCX files.
2.  **RAG with Citations:** Answers questions citing specific source documents/pages.
3.  **LangServe Integration:** Auto-generates a `server.py` file to run the backend as a Microservice.
4.  **Gradio UI:** Frontend for uploading files and chatting.
5.  **Evaluation:** Basic metrics to test answer quality.

---

## 1. Setup & Configuration

In [1]:
# Install necessary packages
%pip install langchain langchain-openai langchain-community faiss-cpu pymupdf unstructured gradio langserve fastapi uvicorn sse_starlette

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os

os.environ["OPENAI_API_KEY"] = "sk-proj..."  

EMBEDDING_MODEL = "text-embedding-3-small"
CHAT_MODEL = "gpt-4o-mini"

# Check if API Key is set
if not os.environ.get("OPENAI_API_KEY"):
    raise ValueError("Please set OPENAI_API_KEY environment variable.")

## 2. Ingestion Pipeline (Extract, Chunk, Embed)

In [None]:
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, UnstructuredFileLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document

def ingest_documents(file_paths: List[str]) -> FAISS:
    """Loads, splits, and embeds documents into a FAISS vector store."""
    if not file_paths:
        return None
    
    all_docs = []
    for path in file_paths:
        try:
            if path.endswith(".pdf"):
                loader = PyMuPDFLoader(path)
            elif path.endswith(".txt"):
                loader = TextLoader(path)
            else:
                loader = UnstructuredFileLoader(path)
            all_docs.extend(loader.load())
        except Exception as e:
            print(f"Error loading {path}: {e}")
            
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separators=["\n\n", "\n", ".", " "]
    )
    splits = text_splitter.split_documents(all_docs)
    
    embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)
    vectorstore = FAISS.from_documents(splits, embeddings)
    
    print(f"Ingested {len(splits)} chunks from {len(file_paths)} files.")
    return vectorstore

  from .autonotebook import tqdm as notebook_tqdm


## 3. RAG Pipeline with Citations

We use `create_retrieval_chain` combined with `create_stuff_documents_chain` to get the answer along with source documents.

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

def get_rag_chain(vectorstore):
    """Creates a RAG chain that returns the answer and source documents."""
    llm = ChatOpenAI(model=CHAT_MODEL, temperature=0)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
    
    
    system_prompt = (
        "You are an expert legal assistant. Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you don't know. "
        "Keep the answer concise."
        "\n\n"
        "{context}"
    )
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}"),
    ])
    
    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)
    return rag_chain

## 4. LangServe Microservice Implementation

This section **automatically generates** a `server.py` file. This file contains the FastAPI app and LangServe routes, fulfilling the project architecture requirement.

In [None]:
%%writefile server.py
import os
from fastapi import FastAPI
from langserve import add_routes
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

EMBEDDING_MODEL = "text-embedding-3-small"
CHAT_MODEL = "gpt-4o-mini"

embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

try:
    vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    print("Loaded existing FAISS index.")
except:
    print("No existing index found. Creating a temporary empty one for the server startup.")
    from langchain_core.documents import Document
    vectorstore = FAISS.from_documents([Document(page_content="Empty index")], embeddings)

retriever = vectorstore.as_retriever()

llm = ChatOpenAI(model=CHAT_MODEL)
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the context: {context}"),
    ("human", "{input}")
])
chain = create_retrieval_chain(vectorstore.as_retriever(), create_stuff_documents_chain(llm, prompt))

app = FastAPI(
    title="Smart Contract Analysis API",
    version="1.0",
    description="A simple API server using LangChain's Runnable interfaces",
)

add_routes(
    app,
    chain,
    path="/contract-assistant",
)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="localhost", port=8000)


Overwriting server.py


**To run the server:** Open a terminal in this directory and run `python server.py`. You can then access the Playground at `http://localhost:8000/contract-assistant/playground`.

## 5. Evaluation Pipeline (Basic)

Simple check to verify answer quality.

In [6]:
def evaluate_answer(answer: str, ground_truth_keywords: List[str]) -> bool:
    """Checks if the answer contains expected keywords."""
    return any(keyword.lower() in answer.lower() for keyword in ground_truth_keywords)

## 6. Frontend: Gradio UI

This UI connects to our local RAG pipeline defined in this notebook.

In [None]:
import gradio as gr

global_vs = None

def ui_ingest(files):
    global global_vs
    if not files:
        return "Please upload files."
    
    file_paths = [f.name for f in files]
    global_vs = ingest_documents(file_paths)
    
    global_vs.save_local("faiss_index")
    
    return f"Successfully ingested {len(file_paths)} files. Index saved to disk."

def ui_query(message, history):
    global global_vs
    if not global_vs:
        return "Please upload documents first."
    
    chain = get_rag_chain(global_vs)
    response = chain.invoke({"input": message})
    
    answer = response["answer"]
    
    sources = []
    for doc in response.get("context", []):
        sc = doc.metadata.get("source", "Unknown")
        page = doc.metadata.get("page", "N/A")
        sources.append(f"- {os.path.basename(sc)} (Page {page})")
    
    unique_sources = list(set(sources))
    final_output = f"{answer}\n\n**Sources:**\n" + "\n".join(unique_sources)
    return final_output

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ðŸ“œ Smart Contract Assistant v2")
    gr.Markdown("Project-Compliant implementation with RAG, Citations, and LangServe support.")
    
    with gr.Tab("1. Ingestion"):
        file_input = gr.File(label="Upload Contracts", file_count="multiple")
        ingest_btn = gr.Button("Ingest & Save Index", variant="primary")
        ingest_out = gr.Textbox(label="Status")
        ingest_btn.click(ui_ingest, file_input, ingest_out)
        
    with gr.Tab("2. Chat"):
        gr.ChatInterface(
            fn=ui_query,
            title="Chat with Contracts",
            examples=["What is the termination clause?", "Are there any penalties?"],
        )
    
    with gr.Tab("3. Local API"):
        gr.Markdown(
            """
            ### backend Microservice
            To start the backend API:
            1. Open a terminal.
            2. Run: `python server.py`
            3. Access API docs at: `http://localhost:8000/docs`
            """
        )

demo.launch(share=True, server_port=8091)