In [None]:
This script is designed to automate legal risk analysis of contract clauses in RFP (Request for Proposal) documents using a combination of NLP models and a compliance agent.
It performs the full pipeline: from extracting text from a PDF to filtering relevant legal clauses, assessing risks using an AI agent, and generating a structured PDF report.


The main goal is to identify legally important and potentially risky clauses in an RFP document and either:

Flag them with a justification and suggest safer rewrites, or

Confirm if the clause is compliant, using a Groq-based AI agent.

This can save legal teams time and increase accuracy in early-stage contract review.



In [None]:
# ✅ Install dependencies (uncomment if running in Colab or local environment)
# !pip install PyMuPDF
# !pip install load_dotenv
# !pip install transformers torch accelerate
# !pip install phi
#!pip install FPDF

import fitz  # PyMuPDF
import re
import os
from phi.agent import Agent
from phi.model.groq import Groq
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
from fpdf import FPDF  # to export final output

In [None]:
# ✅ Load Groq API key
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"  # Replace with your actual key

# ✅ Load CUAD model
model_name = "akdeniz27/deberta-v2-xlarge-cuad"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

In [None]:
# ✅ RFP categories
rfp_categories = [
    "Termination for Convenience", "Non-Compete", "Governing Law", "Exclusivity",
    "Post-Termination Services", "Audit Rights", "Cap on Liability", "Uncapped Liability",
    "Insurance", "Price Restriction", "Revenue/Profit Sharing", "License Grant",
    "Irrevocable or Perpetual License", "Warranty Duration", "Change of Control",
    "IP Ownership Assignment", "Joint IP Ownership"
]

In [None]:
# ✅ Smart splitter function
def smart_split(text, overlap=0):
    text = re.sub(r'\r\n|\r', '\n', text.strip())
    text = re.sub(r'(?<=\n)(?=\d+\.\s)', '\n', text)
    chunks = re.split(r'(?<=\n)(?=\d+\.\s)', text)
    if overlap > 0:
        overlapped_chunks = []
        for i in range(len(chunks)):
            start = max(0, i - overlap)
            combined = " ".join(chunks[start:i + 1])
            overlapped_chunks.append(combined.strip())
        return overlapped_chunks
    return [chunk.strip() for chunk in chunks if chunk.strip()]

In [None]:
# ✅ Check legal importance using CUAD model
def is_legally_important(clause):
    for category in rfp_categories:
        response = qa_pipeline({
            'question': f"Does this clause relate to '{category}'?",
            'context': clause
        })
        if response['score'] > 0.5:
            return True
    return False

In [None]:
# ✅ Load and extract text from PDF
pdf_path = "e_rpf2.pdf"
doc = fitz.open(pdf_path)
full_text = "\n".join([page.get_text() for page in doc])
chunks = smart_split(full_text, overlap=1)

print(f"✅ {len(chunks)} total chunks created.")

In [None]:
# ✅ Filter only legally relevant clauses
legally_important_chunks = [clause for clause in chunks if is_legally_important(clause)]

print(f"✅ {len(legally_important_chunks)} legally important clauses found.\n")

In [None]:
# ✅ Create the Risk Analyzing Agent
compliant_agent = Agent(
    name="Risk Analysing Agent",
    model=Groq(id="llama-3-70b-8192"),  # use the correct model id from Groq
    instructions=[
        "You are a legal compliance AI agent. Given a contract clause, your responsibilities are:\n"
        "1. Assess for legal risks.\n"
        "2. If risky, respond with:\n"
        "   Justification: <why it is risky>\n"
        "   Rewrite: <a safer version>\n"
        "3. If compliant, just say:\n"
        "   Clause is compliant."
    ],
    markdown=True
)



In [None]:

# ✅ Analyze and collect responses
results = []

for i, clause in enumerate(legally_important_chunks):
    print(f"\n📄 Clause {i+1}/{len(legally_important_chunks)}:")
    response = compliant_agent(clause)
    results.append({
        "clause": clause,
        "response": response
    })
    print(response)

In [None]:
# ✅ Export results to PDF
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)

pdf.cell(200, 10, txt="RFP Legal Risk Analysis Report", ln=True, align='C')
pdf.ln(10)

for idx, item in enumerate(results):
    pdf.set_font("Arial", 'B', 12)
    pdf.multi_cell(0, 10, f"Clause {idx+1}:", align='L')
    pdf.set_font("Arial", '', 11)
    pdf.multi_cell(0, 10, item["clause"], align='L')
    pdf.ln(2)
    pdf.set_text_color(0, 102, 204)
    pdf.multi_cell(0, 10, item["response"], align='L')
    pdf.set_text_color(0, 0, 0)
    pdf.ln(10)

output_path = "rfp_risk_analysis_output.pdf"
pdf.output(output_path)
print(f"\n✅ PDF Report saved to {output_path}")