<a href="https://colab.research.google.com/github/NithickRoshan/Team-COAL/blob/main/Rule_Engine_Layer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
import json
import re
import PyPDF2


# Step 1: Extract text from PDF

def extract_text_from_pdf(pdf_path):
    text_sections = []
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text = page.extract_text()
            if text and len(text.strip()) > 50:
                text_sections.append(text.strip())
    return text_sections

# Step 2: Define JSON Rules

rules_json = """
{
  "max_overhead_percent": 10,
  "max_duration_months": 24,
  "min_budget_lakhs": 5,
  "max_budget_lakhs": 100
}
"""
rules = json.loads(rules_json)


# Step 3: Extract Proposal Data

def extract_proposal_data(text):
    """
    Very basic regex-based extraction.
    Example phrases expected:
      - "Budget: 10L"
      - "Overhead: 1.5L"
      - "Duration: 18 months"
    """
    budget_match = re.search(r"Budget[:\s]*([\d\.]+)\s*L", text, re.IGNORECASE)
    overhead_match = re.search(r"Overhead[:\s]*([\d\.]+)\s*L", text, re.IGNORECASE)
    duration_match = re.search(r"Duration[:\s]*([\d\.]+)\s*months?", text, re.IGNORECASE)

    proposal = {
        "budget_lakhs": float(budget_match.group(1)) if budget_match else None,
        "overhead_lakhs": float(overhead_match.group(1)) if overhead_match else None,
        "duration_months": int(duration_match.group(1)) if duration_match else None
    }
    return proposal


# Step 4: Rule Checker

def validate_proposal(proposal, rules):
    results = []
    passed = 0
    total_rules = 0

    # Rule 1: Overhead %
    if proposal["budget_lakhs"] and proposal["overhead_lakhs"]:
        total_rules += 1
        overhead_percent = (proposal["overhead_lakhs"] / proposal["budget_lakhs"]) * 100
        if overhead_percent <= rules["max_overhead_percent"]:
            results.append(f"✅ Overhead check PASSED ({overhead_percent:.1f}% ≤ {rules['max_overhead_percent']}%)")
            passed += 1
        else:
            results.append(f"❌ Overhead check FAILED ({overhead_percent:.1f}% > {rules['max_overhead_percent']}%)")

    # Rule 2: Duration
    if proposal["duration_months"]:
        total_rules += 1
        if proposal["duration_months"] <= rules["max_duration_months"]:
            results.append(f"✅ Duration check PASSED ({proposal['duration_months']} ≤ {rules['max_duration_months']} months)")
            passed += 1
        else:
            results.append(f"❌ Duration check FAILED ({proposal['duration_months']} > {rules['max_duration_months']} months)")

    # Rule 3: Min Budget
    if proposal["budget_lakhs"]:
        total_rules += 1
        if proposal["budget_lakhs"] >= rules["min_budget_lakhs"]:
            results.append(f"✅ Min Budget check PASSED ({proposal['budget_lakhs']} ≥ {rules['min_budget_lakhs']} lakhs)")
            passed += 1
        else:
            results.append(f"❌ Min Budget check FAILED ({proposal['budget_lakhs']} < {rules['min_budget_lakhs']} lakhs)")

    # Rule 4: Max Budget
    if proposal["budget_lakhs"]:
        total_rules += 1
        if proposal["budget_lakhs"] <= rules["max_budget_lakhs"]:
            results.append(f"✅ Max Budget check PASSED ({proposal['budget_lakhs']} ≤ {rules['max_budget_lakhs']} lakhs)")
            passed += 1
        else:
            results.append(f"❌ Max Budget check FAILED ({proposal['budget_lakhs']} > {rules['max_budget_lakhs']} lakhs)")

    compliance_score = (passed / total_rules) * 100 if total_rules > 0 else 0
    return results, compliance_score


# Step 5: Run on PDF

if __name__ == "__main__":
    pdf_path = "Business Proposals.pdf"
    sections = extract_text_from_pdf(pdf_path)

    # Demo: simulate adding budget info inside a proposal section
    # (since your PDF doesn’t have real numbers, we inject one)
    test_text = sections[0] + "\nBudget: 10L\nOverhead: 1.5L\nDuration: 18 months"
    proposal = extract_proposal_data(test_text)

    results, score = validate_proposal(proposal, rules)

    print("---- Extracted Proposal Data ----")
    print(proposal)

    print("\n---- Rule Validation Report ----")
    for r in results:
        print(r)

    print(f"\n✅ Compliance Score: {score:.1f}%")




---- Extracted Proposal Data ----
{'budget_lakhs': 10.0, 'overhead_lakhs': 1.5, 'duration_months': 18}

---- Rule Validation Report ----
❌ Overhead check FAILED (15.0% > 10%)
✅ Duration check PASSED (18 ≤ 24 months)
✅ Min Budget check PASSED (10.0 ≥ 5 lakhs)
✅ Max Budget check PASSED (10.0 ≤ 100 lakhs)

✅ Compliance Score: 75.0%
