<a href="https://colab.research.google.com/github/Reshoan/Excel-Summary/blob/main/Excel_Summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
%%writefile app.py
from pyngrok import ngrok
import streamlit as st
import pandas as pd
from io import BytesIO
import re
import time
from openpyxl import load_workbook
from collections import deque, defaultdict

# Try to import graphviz python package
try:
    import graphviz
    _GRAPHVIZ_AVAILABLE = True
except Exception:
    _GRAPHVIZ_AVAILABLE = False

st.title("Excel Form Processor")

# ------------------------------
# Upload file and store in session
# ------------------------------
if "uploaded_file" not in st.session_state:
    st.session_state.uploaded_file = None
if "processed_excel" not in st.session_state:
    st.session_state.processed_excel = None
if "flowchart_data" not in st.session_state:
    st.session_state.flowchart_data = None

uploaded_file = st.file_uploader("Upload an Excel file", type=["xlsx"])
if uploaded_file:
    st.session_state.uploaded_file = uploaded_file

# Only process Excel once per uploaded file
if st.session_state.uploaded_file and st.session_state.processed_excel is None:
    uploaded_file = st.session_state.uploaded_file
    xls = pd.ExcelFile(uploaded_file)

    if "formFields" not in xls.sheet_names:
        st.error("The Excel file must contain a sheet named 'formFields'.")
    else:
        # ----- Process Excel and build sheets -----
        df = pd.read_excel(uploaded_file, sheet_name="formFields")
        field_name_col = "name"
        field_type_col = "input_type"
        field_order_col = "order"
        form_col = "formName"
        fixed_category_cols = [
            "is_editable", "is_hidden", "is_required", "is_nullable",
            "is_multiselect", "is_richtext", "editor_height",
            "is_encrypted", "is_conditional"
        ]
        selected_category_cols = [c for c in fixed_category_cols if c in df.columns]

        field_mapping_df = pd.DataFrame(columns=["formName", "fieldName"])
        if "fieldMapping" in xls.sheet_names:
            field_mapping_df = pd.read_excel(uploaded_file, sheet_name="fieldMapping")

        all_forms = sorted(df[form_col].dropna().unique())
        total_sheet_count = len(all_forms) + 2
        st.write(f"Creating {total_sheet_count} new sheets...")

        sheet_list_placeholder = st.empty()
        output = BytesIO()
        progress_bar = st.progress(0)
        uploaded_file.seek(0)
        wb = load_workbook(uploaded_file)

        for sheet_to_remove in ["Transition_mapping", "Transition_mapping_visio"]:
            if sheet_to_remove in wb.sheetnames:
                wb.remove(wb[sheet_to_remove])

        created_sheet_names = []

        # --- Form sheets ---
        for i, full_form_name in enumerate(all_forms):
            parts = full_form_name.split("_")
            if len(parts) > 3:
                middle_parts = parts[2:-1]
                short_name = "_".join(middle_parts)
            else:
                short_name = full_form_name

            safe_short_name = re.sub(r'[\\/*?:\[\]]', '_', short_name)[:31]
            form_df = df[df[form_col] == full_form_name].sort_values(by=field_order_col)
            final_df = form_df[[field_name_col, field_type_col, field_order_col]].copy()

            repeater_list = []
            filtered_mapping = field_mapping_df[field_mapping_df['formName'] == full_form_name]
            mapped_fields = set(filtered_mapping['fieldName'].dropna())
            for field in final_df[field_name_col]:
                repeater_list.append("Yes" if field in mapped_fields else "")
            final_df.insert(len(final_df.columns), "Repeater", repeater_list)

            if selected_category_cols:
                categories_list = []
                for _, row in form_df.iterrows():
                    selected = [col for col in selected_category_cols if row.get(col) == 1]
                    categories_list.append(",".join(sorted(selected)) if selected else "")
                final_df["Categories"] = categories_list

            if safe_short_name in wb.sheetnames:
                wb.remove(wb[safe_short_name])
            ws = wb.create_sheet(title=safe_short_name)
            ws.cell(row=1, column=1, value="Form")
            ws.cell(row=1, column=2, value=full_form_name)
            for c_idx, header in enumerate(final_df.columns, start=1):
                ws.cell(row=2, column=c_idx, value=header)
            for r_idx, row in enumerate(final_df.values, start=3):
                for c_idx, value in enumerate(row, start=1):
                    ws.cell(row=r_idx, column=c_idx, value=value)

            created_sheet_names.append(safe_short_name)
            sheet_list_placeholder.markdown(
                "<div style='max-height:200px; overflow-y:auto; border:1px solid #444; padding:10px; border-radius:5px;'>"
                + "<br>".join([f"{idx+1}. {name}" for idx, name in enumerate(created_sheet_names)])
                + "</div>",
                unsafe_allow_html=True
            )
            progress_bar.progress(int((len(created_sheet_names) / total_sheet_count) * 100))
            time.sleep(0.05)

        # --- Transition Mapping ---
        node_order = []
        edges = defaultdict(list)
        original_name = uploaded_file.name

        if "transitions" in xls.sheet_names:
            transitions_df = pd.read_excel(uploaded_file, sheet_name="transitions")
            needed_cols = ["name", "workflowFromStateName", "workflowToStateName", "workflowFormName", "fromToStateName"]
            transitions_df = transitions_df[[c for c in needed_cols if c in transitions_df.columns]].copy()
            # SLA merge if workflowSlas exists
            if "workflowSlas" in xls.sheet_names:
                slas_df = pd.read_excel(uploaded_file, sheet_name="workflowSlas")
                if {"workflowFromStateName", "workflowToStateName", "sla_time", "sla_time_type"}.issubset(slas_df.columns):
                    for idx, row in transitions_df.iterrows():
                        match = slas_df[
                            (slas_df["workflowFromStateName"] == row["workflowFromStateName"]) &
                            (slas_df["workflowToStateName"] == row["workflowToStateName"])
                        ]
                        if not match.empty:
                            transitions_df.at[idx, "sla_time"] = match.iloc[0]["sla_time"]
                            transitions_df.at[idx, "sla_time_type"] = match.iloc[0]["sla_time_type"]

            ws = wb.create_sheet("Transition_mapping")
            for c_idx, header in enumerate(transitions_df.columns, start=1):
                ws.cell(row=1, column=c_idx, value=header)
            for r_idx, row in enumerate(transitions_df.values, start=2):
                for c_idx, value in enumerate(row, start=1):
                    ws.cell(row=r_idx, column=c_idx, value=value)
            created_sheet_names.append("Transition_mapping")

            # --- Build Visio-style diagram data ---
            nodes = set()
            for _, row in transitions_df.iterrows():
                frm = str(row.get("workflowFromStateName","")).strip()
                to = str(row.get("workflowToStateName","")).strip()
                name = str(row.get("name","")).strip()
                nodes.update([frm,to])
                edges[frm].append((to,name))

            # BFS node ordering
            start_node = "Product Requisition" if "Product Requisition" in nodes else sorted(nodes)[0] if nodes else None
            seen = set()
            if start_node:
                q = deque([start_node])
                seen.add(start_node)
                while q:
                    n = q.popleft()
                    node_order.append(n)
                    for to, _ in edges.get(n, []):
                        if to not in seen:
                            seen.add(to)
                            q.append(to)
                for n in sorted(nodes):
                    if n not in seen:
                        node_order.append(n)
            else:
                node_order = sorted(nodes)

            # Assign IDs
            id_map = {node: f"P{(idx+1)*100}" for idx,node in enumerate(node_order)}

            diagram_rows = []
            for node in node_order:
                desc = node
                targets = edges.get(node, [])
                if targets:
                    tgt_map = defaultdict(list)
                    for to, tname in targets:
                        tgt_map[to].append(tname if tname else "")
                    ordered_targets = sorted(tgt_map.keys(), key=lambda s: s.lower())
                    next_ids = [id_map.get(t,"") for t in ordered_targets]
                    connector_labels = [", ".join([n for n in tgt_map[t] if n]) for t in ordered_targets]
                    next_ids_str = ",".join(next_ids)
                    connector_str = ",".join(connector_labels)
                else:
                    next_ids_str = connector_str = ""

                low = node.lower() if isinstance(node,str) else ""
                shape = "Start" if node=="Product Requisition" else "End" if ("reject" in low or "closed" in low) else "Process"

                diagram_rows.append({
                    "Process Step ID": id_map.get(node,""),
                    "Process Step Description": desc,
                    "Next Step ID": next_ids_str,
                    "Connector Label": connector_str,
                    "Shape Type": shape,
                    "Alt Text": ""
                })

            # Save Visio sheet
            visio_sheet_name = "Transition_mapping_visio"
            if visio_sheet_name in wb.sheetnames:
                wb.remove(wb[visio_sheet_name])
            wsd = wb.create_sheet(visio_sheet_name)
            headers = ["Process Step ID","Process Step Description","Next Step ID","Connector Label","Shape Type","Alt Text"]
            for c_idx, header in enumerate(headers, start=1):
                wsd.cell(row=1, column=c_idx, value=header)
            for r_idx, row in enumerate(diagram_rows, start=2):
                for c_idx, header in enumerate(headers, start=1):
                    wsd.cell(row=r_idx, column=c_idx, value=row.get(header,""))

        created_sheet_names.append(visio_sheet_name)
        sheet_list_placeholder.markdown(
            "<div style='max-height:200px; overflow-y:auto; border:1px solid #444; padding:10px; border-radius:5px;'>"
            + "<br>".join([f"{idx+1}. {name}" for idx, name in enumerate(created_sheet_names)])
            + "</div>",
            unsafe_allow_html=True
        )

        # Save Excel bytes in session
        output.seek(0)
        wb.save(output)
        output.seek(0)
        st.session_state.processed_excel = output.getvalue()
        st.session_state.flowchart_data = {
            "node_order": node_order,
            "edges": edges,
            "original_name": original_name
        }

# ------------------------------
# Show download buttons
# ------------------------------
if st.session_state.processed_excel:
    st.download_button(
        label="Download Modified Excel",
        data=st.session_state.processed_excel,
        file_name=f"Processed_{st.session_state.flowchart_data['original_name']}",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )
# ------------------------------
# Flowchart options
# ------------------------------
if st.session_state.flowchart_data:
    st.subheader("Download Flowchart")

    node_order = st.session_state.flowchart_data["node_order"]
    edges = st.session_state.flowchart_data["edges"]
    original_name = st.session_state.flowchart_data["original_name"]

    file_format = st.selectbox("Choose file format", ["PDF", "SVG", "PNG"])
    orientation = st.selectbox("Choose flowchart orientation", ["Vertical", "Horizontal"])

    flowchart_placeholder = st.empty()

    # Always show a button placeholder first (disabled)
    with flowchart_placeholder.container():
        st.button("Generating Flowchartâ€¦", disabled=True)

    # Now generate the flowchart after selections
    with st.spinner("Generating flowchart, please wait..."):
        dot_format = file_format.lower()
        dot = graphviz.Digraph(format=dot_format)
        dot.attr(rankdir="TB" if orientation == "Vertical" else "LR")

        # Nodes
        for node in node_order:
            label = node
            low = node.lower()
            if node == "Product Requisition":
                dot.node(node, label=label, shape="oval", style="filled", fillcolor="lightgrey")
            elif ("reject" in low) or ("closed" in low):
                dot.node(node, label=label, shape="doublecircle", style="filled", fillcolor="lightcoral")
            else:
                dot.node(node, label=label, shape="box")

        # Edges
        edge_label_map = defaultdict(list)
        for frm, lst in edges.items():
            for to, tname in lst:
                edge_label_map[(frm, to)].append(tname)

        for (f, t), labels in edge_label_map.items():
            cleaned = " | ".join([l for l in labels if l])
            if cleaned:
                dot.edge(f, t, label=cleaned)
            else:
                dot.edge(f, t)

        tmp_output = "/tmp/flowchart"
        rendered_path = dot.render(filename=tmp_output, cleanup=True)
        with open(rendered_path, "rb") as fh:
            flowchart_bytes = fh.read()

    # Once finished, replace disabled button with active download button
    flowchart_placeholder.download_button(
        label="Download Flowchart",
        data=flowchart_bytes,
        file_name=f"{'Flowchart_Var_' if orientation=='Vertical' else 'Flowchart_Hor_'}{original_name.rsplit('.',1)[0]}.{dot_format}",
        mime="application/octet-stream"
    )


Overwriting app.py


In [None]:
# Install packages (if not installed already)
!pip install streamlit pyngrok openpyxl --quiet

from pyngrok import ngrok
import time

# Kill previous tunnels
ngrok.kill()

# Set your ngrok token
ngrok.set_auth_token("35gXlCpbnwCAIJ9I2xiA1GXDu2u_3UoQf6QeGYhFC9MxYapgo")

# Start Streamlit in the background
get_ipython().system_raw("streamlit run app.py &")

# Wait a few seconds for Streamlit to start
time.sleep(5)

# Open ngrok tunnel
public_url = ngrok.connect(8501)
print("Streamlit app running at:", public_url)
