In [1]:
pip install transformers accelerate sentencepiece gradio --quiet


In [None]:
# ==========================
#   CLAUSE WISE — SINGLE CELL APP
#   AI-Powered Legal Document Analyzer (Python + Gradio + Granite 3.3 2B)
# ==========================

!pip install transformers accelerate sentencepiece gradio --quiet

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
import torch

# ================================
#  Load Model (IBM Granite 3.3 2B Instruct)
# ================================
HF_TOKEN = "your_hf_token_here"  # <-- replace with your token

model_name = "ibm-granite/granite-3.3-2b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    use_auth_token=HF_TOKEN
)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=800,
    temperature=0.2,
)

# ================================
# Helper function for LLM prompting
# ================================
def ask_llm(prompt, document):
    full_prompt = (
        f"You are ClauseWise, an AI legal document analysis assistant.\n\n"
        f"DOCUMENT:\n{document}\n\n"
        f"TASK:\n{prompt}\n\n"
        f"Return the result in clear, structured format."
    )
    result = llm(full_prompt)[0]["generated_text"]
    return result[len(full_prompt):].strip()


# ================================
#  Feature Functions
# ================================
def plain_language(doc):
    return ask_llm("Translate the document into plain, simple, layperson-friendly language.", doc)

def clause_identification(doc):
    return ask_llm("Identify and categorize key clauses, obligations, parties, dates, risks, and core elements.", doc)

def summary(doc):
    return ask_llm("Generate a concise and clear summary of the document.", doc)

def risk_detection(doc):
    return ask_llm("Analyze and list any risks, anomalies, missing clauses, unusual terms, or compliance issues.", doc)

def search_query(doc, query):
    return ask_llm(f"Answer the following query about the document: {query}", doc)

def comparative(doc1, doc2):
    return ask_llm(f"Compare these two documents, highlight differences, and perform automated redlining.\n\nOther document:\n{doc2}", doc1)

def definitions(doc):
    return ask_llm("Provide definitions and explanations of key legal terms and clauses found in the document.", doc)

def predictive(doc):
    return ask_llm("Provide predictive legal insights based on common legal outcomes and risks.", doc)

def classify_type(doc):
    return ask_llm("Classify the document type (contract, NDA, court order, pleading, etc.) and explain reasoning.", doc)


# ================================
#  Gradio Interface
# ================================
with gr.Blocks(title="ClauseWise - AI Legal Document Analyzer") as app:

    gr.Markdown("# 📘 **ClauseWise – AI Legal Document Analyzer**")
    gr.Markdown("Upload or paste legal text and select a feature to analyze it.")

    with gr.Tab("Plain Language Translation"):
        doc = gr.Textbox(lines=15, label="Legal Document")
        out = gr.Textbox(label="Result")
        btn = gr.Button("Translate")
        btn.click(plain_language, inputs=doc, outputs=out)

    with gr.Tab("Clause & Concept Identification"):
        doc2 = gr.Textbox(lines=15, label="Legal Document")
        out2 = gr.Textbox(label="Result")
        btn2 = gr.Button("Identify Clauses")
        btn2.click(clause_identification, inputs=doc2, outputs=out2)

    with gr.Tab("Summary Generation"):
        doc3 = gr.Textbox(lines=15, label="Legal Document")
        out3 = gr.Textbox(label="Summary")
        btn3 = gr.Button("Summarize")
        btn3.click(summary, inputs=doc3, outputs=out3)

    with gr.Tab("Risk & Anomaly Detection"):
        doc4 = gr.Textbox(lines=15, label="Legal Document")
        out4 = gr.Textbox(label="Risks")
        btn4 = gr.Button("Detect Risks")
        btn4.click(risk_detection, inputs=doc4, outputs=out4)

    with gr.Tab("Search & Query"):
        doc5 = gr.Textbox(lines=15, label="Legal Document")
        query = gr.Textbox(label="Ask a Question")
        out5 = gr.Textbox(label="Answer")
        btn5 = gr.Button("Ask")
        btn5.click(search_query, inputs=[doc5, query], outputs=out5)

    with gr.Tab("Comparative Analysis"):
        doc6a = gr.Textbox(lines=10, label="Document A")
        doc6b = gr.Textbox(lines=10, label="Document B")
        out6 = gr.Textbox(label="Differences / Redlining")
        btn6 = gr.Button("Compare")
        btn6.click(comparative, inputs=[doc6a, doc6b], outputs=out6)

    with gr.Tab("Definitions & Explanations"):
        doc7 = gr.Textbox(lines=15, label="Legal Document")
        out7 = gr.Textbox(label="Definitions")
        btn7 = gr.Button("Explain Terms")
        btn7.click(definitions, inputs=doc7, outputs=out7)

    with gr.Tab("Predictive Analytics"):
        doc8 = gr.Textbox(lines=15, label="Legal Document")
        out8 = gr.Textbox(label="Predictions")
        btn8 = gr.Button("Analyze Future Implications")
        btn8.click(predictive, inputs=doc8, outputs=out8)

    with gr.Tab("Document Type Classification"):
        doc9 = gr.Textbox(lines=15, label="Legal Document")
        out9 = gr.Textbox(label="Document Type")
        btn9 = gr.Button("Classify")
        btn9.click(classify_type, inputs=doc9, outputs=out9)

app.launch(debug=True)




tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://65fcb290f4dc510e42.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
