<a href="https://colab.research.google.com/github/JAshinflame/AI-Agents/blob/main/ncr_rfi_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NCR & RFI Generator Notebook (Auto-insert placeholders)

This notebook will:
1. Convert `.doc` templates to `.docx` using LibreOffice if needed.
2. Insert Jinja-style placeholders automatically into your templates (NCR & RFI).
3. Render the templates using `docxtpl` with provided inputs:
   - WIR remark: `Cold joint observed at slab edge and needs rectification.`
   - RFI query: `Missing dimension in Section B-B of drawing A-305.`
4. Save the filled DOCX files into `./generated/` and try to convert to PDF if LibreOffice is available.

⚠️ Notes:
- This notebook tries to convert `.doc` → `.docx` using `soffice` (LibreOffice). If LibreOffice is not installed, please convert templates to `.docx` manually before running.
- Install required Python packages in the first cell when prompted.


In [6]:
# Install required python packages
!pip install python-docx docxtpl --quiet

In [9]:
import subprocess
import sys
from pathlib import Path
from docx import Document
from docxtpl import DocxTemplate
import shutil
import re
import json

# === Configuration: input template paths ===
ncr_template = Path('/content/Test 7. Non-Conformance Report (NCR).docx')
rfi_template = Path('/content/Test 1 . Request for Information RFI.docx')
output_folder = Path('/content/generated')
output_folder.mkdir(parents=True, exist_ok=True)

def convert_doc_to_docx(doc_path: Path) -> Path:
    '''Try to convert .doc to .docx using LibreOffice (soffice). Returns .docx Path.'''
    if doc_path.suffix.lower() == '.docx':
        return doc_path
    out_path = doc_path.with_name(doc_path.stem + '_converted' + '.docx') # Use a new name to avoid overwriting original if it was already .docx
    # Try soffice conversion
    try:
        cmd = ['soffice', '--headless', '--convert-to', 'docx', '--outdir', str(doc_path.parent), str(doc_path)]
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        # Soffice creates the new file in the --outdir, with .docx suffix.
        # The command doesn't return the exact output path, so we have to guess based on input name.
        # The converted file name will be the original stem + .docx in the output directory.
        converted_path_guess = doc_path.parent / (doc_path.stem + '.docx')
        if converted_path_guess.exists():
            print(f'Converted {doc_path.name} -> {converted_path_guess.name}')
            return converted_path_guess
        else:
             raise FileNotFoundError(f'LibreOffice conversion succeeded, but converted file not found at {converted_path_guess}.')

    except Exception as e:
        print('LibreOffice conversion failed or soffice not found:', e)
    raise RuntimeError(f'Unable to convert {doc_path} to .docx automatically. Please convert manually and re-run.')


def insert_placeholders(docx_path: Path, placeholder_map: dict) -> Path:
    '''Insert placeholders into a copy of the docx and return new path.'''
    doc = Document(str(docx_path))
    modified = False
    out_path = docx_path.with_name(docx_path.stem + '_placeholders.docx')
    # Create a new document to hold the modified content
    new_doc = Document()

    # Store original paragraphs for checking against placeholders
    original_paragraphs_text = [p.text.strip().lower() for p in doc.paragraphs]

    # For each paragraph, check if it looks like a section heading and inject placeholder in next paragraph
    for i, p in enumerate(doc.paragraphs):
        text = p.text.strip()
        new_doc.add_paragraph(text) # Add the original paragraph text
        if not text:
            continue
        key = text.lower().rstrip(':').strip()
        if key in placeholder_map:
            placeholder = placeholder_map[key]
            new_doc.add_paragraph(placeholder) # Add the placeholder as a new paragraph after the heading
            modified = True

    # Also attempt to replace common inline labels (e.g., 'Project: XYZ') with 'Project: {{project_name}}' in the new document
    inline_pattern_map = {
        r'(Project\s*[:\-]\s*)(.+)': r'\1{{project_name}}',
        r'(Client\s*[:\-]\s*)(.+)': r'\1{{client}}',
        r'(Location\s*[:\-]\s*)(.+)': r'\1{{location}}',
        r'(Contractor\s*[:\-]\s*)(.+)': r'\1{{contractor}}',
        r'(Query\s*[:\-]\s*)(.+)': r'\1{{query}}', # Added inline replacement for Query
        r'(Description\s*[:\-]\s*)(.+)': r'\1{{background}}' # Added inline replacement for Description in RFI context
    }
    # Iterate through the paragraphs in the newly created document to apply inline replacements
    for p in new_doc.paragraphs:
        for pat, repl in inline_pattern_map.items():
            if re.search(pat, p.text, flags=re.IGNORECASE):
                new = re.sub(pat, repl, p.text, flags=re.IGNORECASE)
                p.text = new
                modified = True

    # Check if any placeholders from the map were NOT found as headings or inline labels
    # If not found, append them at the end as a fallback
    existing_placeholders_in_new_doc = set(re.findall(r'\{\{.*?\}\}', '\n'.join([p.text for p in new_doc.paragraphs])))
    for key, placeholder in placeholder_map.items():
        if placeholder not in existing_placeholders_in_new_doc:
            # Check if the original document had a paragraph that matched the key (case-insensitive, stripped)
            found_in_original = False
            # Check headings
            if key in original_paragraphs_text:
                found_in_original = True
            # Check inline labels using the patterns
            if not found_in_original:
                 for pat in inline_pattern_map:
                    # We check if the original text contains the text part that would be matched by the pattern
                    # before the placeholder is inserted. This is a heuristic check.
                    text_to_match = pat.split(r'\s*[:\-]\s*')[0].replace(r'(', '').replace(r')', '').lower()
                    for original_text in original_paragraphs_text:
                         if text_to_match in original_text:
                              found_in_original = True
                              break
                    if found_in_original:
                         break


            if not found_in_original:
                print(f"Warning: Placeholder '{placeholder}' (for key '{key}') was not found as a heading or inline label. Appending at the end.")
                new_doc.add_paragraph(f"Missing Placeholder: {placeholder}") # Add placeholder at the end as a fallback
                modified = True


    new_doc.save(str(out_path))
    if modified:
        print('Inserted placeholders into', out_path.name)
    else:
        print('No explicit headings matched; saved copy with inline replacements (if any).', out_path.name)
    return out_path

# Prepare placeholder maps (common labels -> placeholder)
ncr_placeholders = {
    'project': '{{project_name}}',
    'project name': '{{project_name}}',
    'location': '{{location}}',
    'client': '{{client}}',
    'contractor': '{{contractor}}',
    'description of non-conformance': '{{description}}',
    'description': '{{description}}',
    'reported by': '{{reported_by}}',
    'issued by': '{{issued_by}}',
    'ncr no': '{{ncr_number}}',
    'ncr number': '{{ncr_number}}',
    'action required': '{{action_required}}', # Added based on context_ncr
    'severity': '{{severity}}', # Added based on context_ncr
    'responsible party': '{{responsible_party}}', # Added based on context_ncr
    'due date': '{{due_date}}' # Added based on context_ncr
}

rfi_placeholders = {
    'project': '{{project_name}}',
    'drawing reference': '{{drawing_ref}}',
    'drawing': '{{drawing_ref}}',
    'query': '{{query}}',
    'description': '{{background}}', # Mapped 'description' heading to 'background' placeholder
    'background': '{{background}}', # Explicitly added 'background' as a key for fallback
    'requested action': '{{requested_action}}',
    'issued by': '{{issued_by}}',
    'rfi no': '{{rfi_number}}',
    'rfi number': '{{rfi_number}}', # Added based on context_rfi
    'to': '{{to}}', # Added based on context_rfi
    'priority': '{{priority}}', # Added based on context_rfi
    'due date': '{{due_date}}' # Added based on context_rfi
}


print('Templates:')
print('  NCR:', ncr_template.exists(), ncr_template)
print('  RFI:', rfi_template.exists(), rfi_template)

Templates:
  NCR: True /content/Test 7. Non-Conformance Report (NCR).docx
  RFI: True /content/Test 1 . Request for Information RFI.docx


In [13]:
# Convert templates if needed
try:
    ncr_docx = convert_doc_to_docx(ncr_template)
except Exception as e:
    # Since manual conversion is done, we can skip this error and assume ncr_template is already docx
    print(f"Skipping automatic NCR conversion: {e}")
    ncr_docx = ncr_template


try:
    rfi_docx = convert_doc_to_docx(rfi_template)
except Exception as e:
    # Since manual conversion is done, we can skip this error and assume rfi_template is already docx
    print(f"Skipping automatic RFI conversion: {e}")
    rfi_docx = rfi_template


ncr_docx = Path(ncr_docx)
rfi_docx = Path(rfi_docx)

# --- Add diagnostic step here ---
import magic # You might need to install this library: !pip install python-magic

def get_file_type(filepath):
    try:
        mime = magic.Magic(mime=True)
        file_type = mime.from_file(str(filepath))
        return file_type
    except Exception as e:
        return f"Error getting file type: {e}"

print(f"\nChecking file type for {ncr_docx.name}: {get_file_type(ncr_docx)}")
print(f"Checking file type for {rfi_docx.name}: {get_file_type(rfi_docx)}")
# --- End of diagnostic step ---


# Insert placeholders
ncr_with_ph = insert_placeholders(ncr_docx, ncr_placeholders)
rfi_with_ph = insert_placeholders(rfi_docx, rfi_placeholders)

print('\nPrepared template copies with placeholders:\n', ncr_with_ph, '\n', rfi_with_ph)


Checking file type for Test 7. Non-Conformance Report (NCR).docx: application/vnd.openxmlformats-officedocument.wordprocessingml.document
Checking file type for Test 1 . Request for Information RFI.docx: application/vnd.openxmlformats-officedocument.wordprocessingml.document
Inserted placeholders into Test 7. Non-Conformance Report (NCR)_placeholders.docx
No explicit headings matched; saved copy with inline replacements (if any). Test 1 . Request for Information RFI_placeholders.docx

Prepared template copies with placeholders:
 /content/Test 7. Non-Conformance Report (NCR)_placeholders.docx 
 /content/Test 1 . Request for Information RFI_placeholders.docx


In [12]:
!pip install python-magic --quiet

In [14]:
# === Render templates with example data ===
from datetime import date
import uuid

today = date.today().isoformat()
ncr_number = f'NCR-{date.today().strftime("%Y%m%d")}-{str(uuid.uuid4())[:6].upper()}'
rfi_number = f'RFI-{date.today().strftime("%Y%m%d")}-{str(uuid.uuid4())[:6].upper()}'

context_ncr = {
    'project_name': 'Al Zahra Commercial Tower',
    'location': 'Lusail',
    'client': 'Future Real Estate W.L.L.',
    'contractor': 'ASB Trading & Contracting W.L.L.',
    'ncr_number': ncr_number,
    'ncr_date': today,
    'reported_by': 'Site Inspector: Eng. Ahmed',
    'issued_by': 'QA Manager: Ms. Fatima',
    'description': 'Cold joint observed at slab edge and needs rectification.',
    'action_required': 'Investigate cold joint; prepare surface and perform repair as per structural repair procedure. Document rectification and re-inspect.',
    'severity': 'Major',
    'responsible_party': 'ASB Trading & Contracting W.L.L.',
    'due_date': today
}

context_rfi = {
    'project_name': 'Al Zahra Commercial Tower',
    'rfi_number': rfi_number,
    'rfi_date': today,
    'issued_by': 'Site Inspector: Eng. Ahmed',
    'to': 'ASB Trading & Contracting W.L.L.',
    'drawing_ref': 'A-305',
    'query': 'Missing dimension in Section B-B of drawing A-305.',
    'background': 'While reviewing drawing set, the dimension required to proceed with the work was not present in Section B-B.',
    'requested_action': 'Please provide the missing dimension for Section B-B of drawing A-305 or advise the correct dimension to be used.',
    'priority': 'High',
    'due_date': today,
}
def render_docx(template_path, context, output_name):
    tpl = DocxTemplate(str(template_path))
    tpl.render(context)
    out_path = output_folder / output_name
    tpl.save(str(out_path))
    print('Saved:', out_path)

# Render
ncr_out = f'NCR_{ncr_number}.docx'
rfi_out = f'RFI_{rfi_number}.docx'
render_docx(ncr_with_ph, context_ncr, ncr_out)
render_docx(rfi_with_ph, context_rfi, rfi_out)

print('\nGenerated files in:', output_folder)


Saved: /content/generated/NCR_NCR-20251022-B2E59B.docx
Saved: /content/generated/RFI_RFI-20251022-63B185.docx

Generated files in: /content/generated


In [15]:
# Optional: try to convert outputs to PDF using LibreOffice
def convert_to_pdf(docx_path: Path):
    pdf_path = docx_path.with_suffix('.pdf')
    try:
        cmd = ['soffice', '--headless', '--convert-to', 'pdf', '--outdir', str(docx_path.parent), str(docx_path)]
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if pdf_path.exists():
            print('Converted to PDF:', pdf_path)
            return pdf_path
    except Exception as e:
        print('PDF conversion failed (soffice not found or error):', e)
    return None

for f in output_folder.iterdir():
    if f.suffix.lower() == '.docx':
        convert_to_pdf(f)

print('Done. Check the generated folder for outputs.')


PDF conversion failed (soffice not found or error): [Errno 2] No such file or directory: 'soffice'
PDF conversion failed (soffice not found or error): [Errno 2] No such file or directory: 'soffice'
Done. Check the generated folder for outputs.
