In [4]:
import sys

# Use pip from the same Python environment Jupyter is using
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install pandas requests tqdm python-docx streamlit --no-cache-dir


Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ----------- ---------------------------- 0.5/1.8 MB 3.3 MB/s eta 0:00:01
   ----------------------- ---------------- 1.0/1.8 MB 2.2 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 2.7 MB/s eta 0:00:00
Installing collected packages: pip
Successfully installed pip-25.2




Defaulting to user installation because normal site-packages is not writeable


In [5]:
pip install pandas requests tqdm python-docx streamlit --no-cache-dir


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
import os
import json
import re
import requests
from pathlib import Path
from tqdm import tqdm
import pandas as pd
from docx import Document
from docx.shared import RGBColor
from datetime import datetime

# Folders for uploads, references, and output
BASE = Path(".")
UPLOAD_DIR = BASE / "uploaded_docs"
REF_DIR = BASE / "adgm_refs"
OUT_DIR = BASE / "output"

for folder in (UPLOAD_DIR, REF_DIR, OUT_DIR):
    folder.mkdir(exist_ok=True)


In [7]:
ADGM_RESOURCES = {
    "registration_checklist_branch": "https://www.adgm.com/documents/registration-authority/registration-and-incorporation/checklist/branch-non-financial-services-20231228.pdf",
    "private_company_checklist": "https://www.adgm.com/documents/registration-authority/registration-and-incorporation/checklist/private-company-limited-by-shares-non-financial-services-20231228.pdf",
    "employment_contract_2024": "https://assets.adgm.com/download/assets/ADGM%2BStandard%2BEmployment%2BContract%2BTemplate%2B-%2BER%2B2024%2B%28Feb%2B2025%29.docx/95dc85ba5ec611efa5a1ee51fce3d4eb"
}

def download_file(url, dest_folder):
    local_name = dest_folder / url.split("/")[-1].split("?")[0]
    if local_name.exists():
        return local_name
    resp = requests.get(url, stream=True, timeout=30)
    resp.raise_for_status()
    with open(local_name, "wb") as f:
        for chunk in resp.iter_content(chunk_size=8192):
            f.write(chunk)
    return local_name

for name, url in ADGM_RESOURCES.items():
    print(f"Downloading {name}...")
    path = download_file(url, REF_DIR)
    print("Saved to:", path)


Downloading registration_checklist_branch...
Saved to: adgm_refs\branch-non-financial-services-20231228.pdf
Downloading private_company_checklist...
Saved to: adgm_refs\private-company-limited-by-shares-non-financial-services-20231228.pdf
Downloading employment_contract_2024...
Saved to: adgm_refs\95dc85ba5ec611efa5a1ee51fce3d4eb


In [8]:
def read_docx_text(path: Path) -> str:
    doc = Document(path)
    return "\n".join(p.text for p in doc.paragraphs if p.text.strip())


In [11]:
from docx import Document

# Create sample docs if empty
if not any(UPLOAD_DIR.iterdir()):
    # Sample AoA document
    doc1 = Document()
    doc1.add_paragraph("Articles of Association")
    doc1.add_paragraph("Jurisdiction: UAE Federal Courts")
    doc1.save(UPLOAD_DIR / "sample_aoa.docx")

    # Sample Register of Members document
    doc2 = Document()
    doc2.add_paragraph("Register of Members: To be added")
    doc2.add_paragraph("Signature: __________________")
    doc2.save(UPLOAD_DIR / "sample_register.docx")

print("Uploaded files:", [p.name for p in list_uploaded_docs()])


Uploaded files: ['sample_aoa.docx', 'sample_register.docx']


In [12]:
DOC_KEYWORDS = {
    "articles of association": ["articles of association", "aoa"],
    "memorandum of association": ["memorandum of association", "moa", "memorandum"],
    "register of members": ["register of members", "register of shareholders"],
    "incorporation resolution": ["resolution", "incorporation"],
    "employment contract": ["employment contract", "employee", "employer"],
    "ubo declaration": ["beneficial owner", "ubo"]
}

def classify_document(text: str):
    text_l = text.lower()
    scores = {}
    for dtype, kws in DOC_KEYWORDS.items():
        for kw in kws:
            if kw in text_l:
                scores[dtype] = scores.get(dtype, 0) + 1
    return max(scores, key=scores.get) if scores else "unknown"


In [13]:
REQUIRED_FOR_INCORPORATION = [
    "articles of association",
    "memorandum of association",
    "incorporation resolution",
    "register of members",
    "ubo declaration"
]

def check_checklist(classified_docs):
    missing = [d for d in REQUIRED_FOR_INCORPORATION if d not in classified_docs]
    return {
        "required_documents": len(REQUIRED_FOR_INCORPORATION),
        "uploaded_documents": len(classified_docs),
        "missing_documents": missing
    }


In [14]:
def detect_red_flags(text: str, doc_name=""):
    issues = []
    tl = text.lower()

    if "adgm" not in tl and ("federal court" in tl or "uae federal" in tl):
        issues.append({
            "document": doc_name,
            "issue": "Wrong jurisdiction",
            "severity": "High",
            "suggestion": "Update jurisdiction to ADGM Courts"
        })

    if "________________" in text or "signature" not in tl:
        issues.append({
            "document": doc_name,
            "issue": "Missing signature block",
            "severity": "Medium",
            "suggestion": "Add proper signature section"
        })

    return issues


In [15]:
def annotate_docx(input_path: Path, issues: list, output_path: Path):
    doc = Document(input_path)
    for p in doc.paragraphs:
        for issue in issues:
            if issue["issue"].lower() in p.text.lower():
                for run in p.runs:
                    run.font.color.rgb = RGBColor(200, 30, 30)

    doc.add_page_break()
    doc.add_heading("REVIEW COMMENTS", level=2)
    for i, iss in enumerate(issues, 1):
        doc.add_paragraph(f"{i}. {iss['issue']} — {iss['suggestion']}")
    doc.save(output_path)


In [16]:
def process_all():
    results = {"documents": [], "issues": []}
    classified = []
    for p in list_uploaded_docs():
        txt = read_docx_text(p)
        dtype = classify_document(txt)
        classified.append(dtype)
        issues = detect_red_flags(txt, doc_name=p.name)
        results["documents"].append({"file": p.name, "type": dtype, "issues": len(issues)})
        results["issues"].extend(issues)
        annotate_docx(p, issues, OUT_DIR / f"reviewed_{p.name}")

    results.update(check_checklist(classified))
    with open(OUT_DIR / "review_report.json", "w") as f:
        json.dump(results, f, indent=2)
    return results

report = process_all()
report


{'documents': [{'file': 'sample_aoa.docx',
   'type': 'articles of association',
   'issues': 2},
  {'file': 'sample_register.docx',
   'type': 'register of members',
   'issues': 1}],
 'issues': [{'document': 'sample_aoa.docx',
   'issue': 'Wrong jurisdiction',
   'severity': 'High',
   'suggestion': 'Update jurisdiction to ADGM Courts'},
  {'document': 'sample_aoa.docx',
   'issue': 'Missing signature block',
   'severity': 'Medium',
   'suggestion': 'Add proper signature section'},
  {'document': 'sample_register.docx',
   'issue': 'Missing signature block',
   'severity': 'Medium',
   'suggestion': 'Add proper signature section'}],
 'required_documents': 5,
 'uploaded_documents': 2,
 'missing_documents': ['memorandum of association',
  'incorporation resolution',
  'ubo declaration']}

In [19]:
# CELL A: show current folder and list files in output/
from pathlib import Path

cwd = Path.cwd()
print("Your notebook is running here (current folder):")
print(" ", cwd, "\n")

out = cwd / "output"
if out.exists() and any(out.iterdir()):
    print("Files in output/:")
    for f in sorted(out.iterdir()):
        print(" -", f.name)
else:
    print("No files found in output/.")
    print("If you haven't run the review yet, run the cell that calls process_all().")


Your notebook is running here (current folder):
  C:\Users\Admin 

Files in output/:
 - review_report.json
 - reviewed_sample_aoa.docx
 - reviewed_sample_register.docx


In [20]:
# CELL B (Windows)
import os
out = "output"
if os.path.exists(out):
    os.startfile(out)   # opens the folder in File Explorer
else:
    print("No output/ folder found. Run process_all() first.")


In [21]:
# CELL C: show review_report.json contents in a friendly way
import json
from pathlib import Path

rep = Path("output") / "review_report.json"
if rep.exists():
    data = json.loads(rep.read_text(encoding="utf-8"))
    docs = data.get("documents", [])
    print(f"Documents reviewed: {len(docs)}\n")
    
    print("Missing required documents (if any):")
    for m in data.get("missing_documents", []):
        print(" -", m)
    print()
    
    print("Issues found (one per line):")
    for issue in data.get("issues", []):
        doc = issue.get("document", "unknown")
        what = issue.get("issue", "")
        sev = issue.get("severity", "")
        sug = issue.get("suggestion", "")
        print(f" - {doc}: {what} [{sev}] -> {sug}")
else:
    print("No review_report.json found in output/. Run process_all() first.")


Documents reviewed: 2

Missing required documents (if any):
 - memorandum of association
 - incorporation resolution
 - ubo declaration

Issues found (one per line):
 - sample_aoa.docx: Wrong jurisdiction [High] -> Update jurisdiction to ADGM Courts
 - sample_aoa.docx: Missing signature block [Medium] -> Add proper signature section
 - sample_register.docx: Missing signature block [Medium] -> Add proper signature section


In [22]:
# CELL D: preview text from the reviewed_*.docx files
from docx import Document
from pathlib import Path

p = Path("output")
if p.exists():
    docs = sorted(p.glob("reviewed_*.docx"))
    if not docs:
        print("No reviewed_*.docx files found in output/.")
    else:
        for d in docs:
            print("------", d.name)
            doc = Document(d)
            # print first 6 paragraphs as a quick preview
            for i, para in enumerate(doc.paragraphs[:6], start=1):
                text = para.text.strip()
                if text:
                    print(f"p{i}: {text}")
            print()
else:
    print("output/ folder not found.")


------ reviewed_sample_aoa.docx
p1: Articles of Association
p2: Jurisdiction: UAE Federal Courts
p4: REVIEW COMMENTS
p5: 1. Wrong jurisdiction — Update jurisdiction to ADGM Courts
p6: 2. Missing signature block — Add proper signature section

------ reviewed_sample_register.docx
p1: Register of Members: To be added
p2: Signature: __________________
p4: REVIEW COMMENTS
p5: 1. Missing signature block — Add proper signature section



In [23]:
# CELL E: list files you originally uploaded
from pathlib import Path
u = Path("uploaded_docs")
if u.exists() and any(u.iterdir()):
    print("Files in uploaded_docs/:")
    for f in sorted(u.glob("*.docx")):
        print(" -", f.name)
else:
    print("No files in uploaded_docs/. Put your .docx files into uploaded_docs/ and run process_all().")


Files in uploaded_docs/:
 - sample_aoa.docx
 - sample_register.docx


In [24]:
# CELL F: re-run the processing if your functions are loaded in the notebook
# (This expects process_all() to exist in the same notebook as a function)
try:
    report = process_all()
    print("Re-run complete. New report saved to output/review_report.json")
except NameError:
    print("process_all() is not defined in this notebook. Make sure you ran the cells that define it.")


Re-run complete. New report saved to output/review_report.json


In [25]:
import os
os.startfile("output")
