In [1]:
# Load environment variables and OpenAI client
from openai import OpenAI
from dotenv import load_dotenv
import os
import re
import pdfplumber
from docx import Document

load_dotenv()  # loads .env file in current directory
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


In [2]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text

def extract_text_from_docx(docx_path):
    from docx import Document
    doc = Document(docx_path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])


In [3]:
SUMMARIZATION_PROMPT = """
You are a legal summarization assistant. Given the contract clause(s) below, create a concise, abstractive legal summary.
Focus on: parties, effective/expiration dates, termination rights, payment/compensation obligations, liability caps, indemnities, IP ownership/licensing, exclusivity, and any unusual risks.
Keep the answer concise (about 3-6 sentences) and use plain language but preserve legal facts and numeric values.

Clause(s):
{chunk_text}

Provide:
1) A short 1-2 sentence overview.
2) Bullet list of top 4 obligations / risks with short tags (e.g., TERMINATION: either party may..., LIABILITY CAP: $X...).
3) If present, list any key dates or numeric amounts found.
"""

def summarize_chunk_openai(chunk_text: str, model: str = "gpt-4.1-mini", temperature: float = 0.0, max_tokens: int = 400):
    prompt = SUMMARIZATION_PROMPT.format(chunk_text=chunk_text)
    resp = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful legal assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens
    )
    return resp.choices[0].message.content.strip()

def hierarchical_summary_openai(text: str, chunk_size_chars: int = 2500, overlap_chars: int = 200, model="gpt-4.1-mini"):
    text = re.sub(r'\n{2,}', '\n\n', text).strip()
    
    # Split text into overlapping chunks
    chunks = []
    i = 0
    while i < len(text):
        chunk = text[i:i+chunk_size_chars]
        chunks.append(chunk)
        i += chunk_size_chars - overlap_chars

    # Summarize each chunk
    chunk_summaries = []
    for ch in chunks:
        summary = summarize_chunk_openai(ch, model=model)
        chunk_summaries.append(summary)

    # Combine summaries and produce final summary
    combined = "\n\n".join(chunk_summaries)
    final_prompt = (
        "You are a legal summarization assistant. The following are intermediate summaries "
        "of parts of a contract. Produce a single concise abstractive summary of the whole contract, "
        "emphasizing obligations, risks, and important dates and numeric values. "
        "Also produce a short (4-item) prioritized checklist of clauses that require human review.\n\n"
        f"{combined}"
    )

    resp = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful legal assistant."},
            {"role": "user", "content": final_prompt}
        ],
        temperature=0.0,
        max_tokens=600
    )
    final_summary = resp.choices[0].message.content.strip()
    return final_summary, chunk_summaries


In [19]:
# Path to the contract file (PDF or DOCX)
file_path = r"D:\AI\Projects\Contract_NLP\PfHospitalityGroupInc_20150923_10-12G_EX-10.1_9266710_EX-10.1_Franchise Agreement1.pdf"

# Extract text
text = extract_text_from_pdf(file_path) if file_path.lower().endswith(".pdf") else extract_text_from_docx(file_path)

# Run hierarchical summarization
final_summary, chunk_summaries = hierarchical_summary_openai(
    text, 
    chunk_size_chars=2500, 
    overlap_chars=200,
    model="gpt-4.1-mini"
)


In [18]:
# Print the final abstractive summary
print("=== Final Abstractive Summary ===")
print(final_summary)

# Save to file
with open("Contract_Abstractive_Summary.txt", "w", encoding="utf-8") as f:
    f.write(final_summary)

print("✅ Abstractive summary saved to 'contract_abstractive_summary.txt'")


=== Final Abstractive Summary ===
**Comprehensive Summary of Pizza Fusion Franchise Agreement**

This Franchise Agreement between Pizza Fusion Holding, Inc. (“Franchisor”) and the Franchisee grants the Franchisee the non-exclusive right to operate a single Pizza Fusion restaurant at a franchisor-approved location, using the franchisor’s proprietary marks, secret recipes, and operating system under strict quality, operational, and branding standards. The initial term is 10 years, with two optional 10-year renewals requiring written notice 9–12 months before expiration and payment of successor fees.

**Key Obligations:**

- **Fees and Payments:**  
  - Initial franchise fee: $30,000 (non-refundable) due at signing.  
  - Weekly royalties: 6% of gross revenues (Monday–Sunday), paid weekly.  
  - Marketing contributions: 3% to a Marketing Fund plus 2% (base) to a Regional Fund, with possible additional 2% Regional Fund increase by majority vote.  
  - Successor franchise fee on renewal or 