## Operations v2 Python

In [13]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import docx
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt
import PyPDF2
import datetime
import re


# Function to extract text from a DOCX file
def extract_text_from_docx(file_path):
    doc = docx.Document(file_path)
    full_text = "\n".join([para.text for para in doc.paragraphs])
    
    table_data = []
    for table in doc.tables:
        for row in table.rows:
            row_data = [cell.text.strip() for cell in row.cells]
            table_data.append(row_data)
    
    return full_text, table_data

# Function to extract text from a PDF file
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        return "\n".join([page.extract_text() or "" for page in reader.pages])

# Function to extract details from text
def extract_details_from_text(text):
    patterns = {
        "project_name": r"Project Name[:\s]+([\w\s\-\d]+)",
        "prepared_by": r"Prepared By[:\s]+([\w\s]+)",
        "overall_progress": r"Overall Progress[:\s]+([\w\s\d%]+)",
        "current_phase": r"Current Phase[:\s]+([\w\s]+)",
        "spi": r"Schedule Performance Index \(SPI\)[:\s]+([\w\s\d\.\-]+)",
        "bpi": r"Budget Performance Index \(BPI\)[:\s]+([\w\s\d\.\-]+)"
    }
    extracted_details = {}
    for key, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        extracted_details[key] = match.group(1).strip() if match else "Not Found"
    return extracted_details

# Function to extract details from tables
def extract_details_from_table(table_data):
    extracted_details = {
        "project_name": "Not Found",
        "prepared_by": "Not Found",
        "overall_progress": "Not Found",
        "current_phase": "Not Found",
        "spi": "Not Found",
        "bpi": "Not Found"
    }
    
    # Check each row for key-value pairs
    for row in table_data:
        if len(row) < 2:
            continue  # Skip rows that don't have at least 2 columns
        
        for i in range(len(row) - 1):
            key = row[i].strip().lower()
            value = row[i + 1].strip()
            
            if "project name" in key:
                extracted_details["project_name"] = value
            if "prepared by" in key:
                extracted_details["prepared_by"] = value
            if "overall progress" in key:
                extracted_details["overall_progress"] = value
            if "current phase" in key:
                extracted_details["current_phase"] = value
            if "schedule performance index" in key or "spi" in key:
                extracted_details["spi"] = value
            if "budget performance index" in key or "bpi" in key:
                extracted_details["bpi"] = value
    
    return extracted_details

# Function to create a combined Word document
def create_combined_docx(file_details):
    filename = f"{datetime.datetime.now().year}-{datetime.datetime.now().isocalendar()[1]:02d}-Operations.docx"
    output_path = f"./{filename}"
    
    doc = docx.Document()
    for i, details in enumerate(file_details):
        doc.add_paragraph(f"File: {details['file_name']}", style='Heading1').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        doc.add_paragraph(f"Project Name: {details['project_name']}", style='Normal')
        doc.add_paragraph(f"Prepared By: {details['prepared_by']}", style='Normal')
        doc.add_paragraph(f"Overall Progress: {details['overall_progress']}", style='Normal')
        doc.add_paragraph(f"Current Phase: {details['current_phase']}", style='Normal')
        doc.add_paragraph(f"Schedule Performance Index (SPI): {details['spi']}", style='Normal')
        doc.add_paragraph(f"Budget Performance Index (BPI): {details['bpi']}", style='Normal')
        if i < len(file_details) - 1:
            doc.add_page_break()
    doc.save(output_path)
    print(f"\n✅ Combined Word document saved as: {output_path}")

# File upload widget
upload_widget = widgets.FileUpload(accept=".docx,.pdf", multiple=True)
process_button = widgets.Button(description="Process Files", button_style="success")
output_area = widgets.Output()

def process_files(change):
    if not upload_widget.value:
        with output_area:
            clear_output()
            print("No files uploaded. Please upload files to process.")
        return
    
    file_details = []
    for index, (name, *_) in enumerate(upload_widget.value):
        file_name = upload_widget.value[index][name]
        print(f"\nProcessing file: {file_name}")
        
        if file_name.endswith(".docx"):
            text, table_data = extract_text_from_docx(file_name)
            details = extract_details_from_table(table_data)
            text_details = extract_details_from_text(text)
            details.update({k: v for k, v in text_details.items() if v != "Not Found"})
        elif file_name.endswith(".pdf"):
            text = extract_text_from_pdf(file_name)
            details = extract_details_from_text(text)
        else:
            print(f"Unsupported file type: {file_name}")
            continue
        
        file_details.append({
            "file_name": file_name,
            **details
        })
    
    # Create a combined Word document
    create_combined_docx(file_details)

# Attach the processing function to the button
process_button.on_click(process_files)

# Display the UI
display(widgets.VBox([upload_widget, process_button, output_area]))


VBox(children=(FileUpload(value=(), accept='.docx,.pdf', description='Upload', multiple=True), Button(button_s…