<a href="https://colab.research.google.com/github/Reshoan/Excel-Summary/blob/main/Excel_Summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
%%writefile app.py
from pyngrok import ngrok
import streamlit as st
import pandas as pd
from io import BytesIO
import re
import time
from openpyxl import load_workbook
from collections import deque, defaultdict

# Try to import graphviz python package
try:
    import graphviz
    _GRAPHVIZ_AVAILABLE = True
except Exception:
    _GRAPHVIZ_AVAILABLE = False

# ------ STREAMLIT APP ------ #
st.title("Excel Form Processor")

# Step 1: Upload Excel file
uploaded_file = st.file_uploader("Upload an Excel file", type=["xlsx"])

if uploaded_file:
    xls = pd.ExcelFile(uploaded_file)

    # Must contain formFields
    if "formFields" not in xls.sheet_names:
        st.error("The Excel file must contain a sheet named 'formFields'.")
    else:

        df = pd.read_excel(uploaded_file, sheet_name="formFields")

        # Fixed required columns
        field_name_col = "name"
        field_type_col = "input_type"
        field_order_col = "order"
        form_col = "formName"

        # Fixed category columns
        fixed_category_cols = [
            "is_editable", "is_hidden", "is_required", "is_nullable",
            "is_multiselect", "is_richtext", "editor_height",
            "is_encrypted", "is_conditional"
        ]
        selected_category_cols = [c for c in fixed_category_cols if c in df.columns]

        # Load fieldMapping sheet
        field_mapping_df = pd.DataFrame(columns=["formName", "fieldName"])
        if "fieldMapping" in xls.sheet_names:
            field_mapping_df = pd.read_excel(uploaded_file, sheet_name="fieldMapping")

        # Process all unique forms
        all_forms = sorted(df[form_col].dropna().unique())
        num_forms = len(all_forms)

        st.write(f"Processing {num_forms} form sheets automatically...")

        # Live-updating numbered sheet list
        sheet_list_placeholder = st.empty()

        output = BytesIO()
        progress_bar = st.progress(0)

        uploaded_file.seek(0)
        wb = load_workbook(uploaded_file)

        # Remove existing mapping/diagram sheets if exist
        for sheet_to_remove in ["Transition_mapping", "Transition_mapping_visio"]:
            if sheet_to_remove in wb.sheetnames:
                wb.remove(wb[sheet_to_remove])

        created_sheet_names = []  # list to show sheets live

        # --------------------------- #
        # Generate form-based sheets
        # --------------------------- #
        for i, full_form_name in enumerate(all_forms):

            parts = full_form_name.split("_")
            if len(parts) > 3:
                middle_parts = parts[2:-1]
                short_name = "_".join(middle_parts)
            else:
                short_name = full_form_name

            safe_short_name = re.sub(r'[\\/*?:\[\]]', '_', short_name)
            safe_short_name = safe_short_name[:31]

            form_df = df[df[form_col] == full_form_name].sort_values(by=field_order_col)

            final_df = form_df[[field_name_col, field_type_col, field_order_col]].copy()

            # Repeater column
            repeater_list = []
            filtered_mapping = field_mapping_df[field_mapping_df['formName'] == full_form_name]
            mapped_fields = set(filtered_mapping['fieldName'].dropna())

            for field in final_df[field_name_col]:
                repeater_list.append("Yes" if field in mapped_fields else "")

            final_df.insert(len(final_df.columns), "Repeater", repeater_list)

            # Categories column
            if selected_category_cols:
                categories_list = []
                for idx, row in form_df.iterrows():
                    selected = [col for col in selected_category_cols if row.get(col) == 1]
                    selected.sort()
                    categories_list.append(",".join(selected) if selected else "")
                final_df["Categories"] = categories_list

            # Remove existing sheet
            if safe_short_name in wb.sheetnames:
                wb.remove(wb[safe_short_name])

            # Create new sheet
            ws = wb.create_sheet(title=safe_short_name)

            # Write header A1 & B1
            ws.cell(row=1, column=1, value="Form")
            ws.cell(row=1, column=2, value=full_form_name)

            # Column headers row 2
            for c_idx, header in enumerate(final_df.columns, start=1):
                ws.cell(row=2, column=c_idx, value=header)

            # Data rows from row 3
            for r_idx, row in enumerate(final_df.values, start=3):
                for c_idx, value in enumerate(row, start=1):
                    ws.cell(row=r_idx, column=c_idx, value=value)

            # Track created sheet
            created_sheet_names.append(safe_short_name)

            # Live updating numbered sheet list
            numbered_list = [f"{idx+1}. {name}" for idx, name in enumerate(created_sheet_names)]
            sheet_list_placeholder.markdown(
                "<div style='max-height:200px; overflow-y:auto; border:1px solid #444; padding:10px; border-radius:5px;'>"
                + "<br>".join(numbered_list)
                + "</div>",
                unsafe_allow_html=True
            )

            # Progress update
            progress_bar.progress(int((i+1) / num_forms * 100))
            time.sleep(0.05)

        # ----------------------------------------------------------- #
        # Save Excel to bytes and provide download
        wb.save(output)
        output.seek(0)
        excel_bytes = output.getvalue()
        original_name = uploaded_file.name if uploaded_file is not None else "file.xlsx"
        excel_download_name = f"Processed_{original_name}"

        st.download_button(
            label="Download Modified Excel",
            data=excel_bytes,
            file_name=excel_download_name,
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )

        # ----------------------------------------------------------- #
        # After Excel download: ask for flowchart preferences
        if "transitions" in xls.sheet_names and _GRAPHVIZ_AVAILABLE:

            st.markdown("---")
            st.subheader("Flowchart Export Options")

            flow_format = st.selectbox("Select Flowchart Format", options=["pdf", "png", "svg"])
            flow_orientation = st.radio("Select Flowchart Orientation", options=["horizontal", "vertical"])

            transitions_df = pd.read_excel(uploaded_file, sheet_name="transitions")
            edges = defaultdict(list)
            nodes = set()

            if "workflowFromStateName" in transitions_df.columns and "workflowToStateName" in transitions_df.columns:
                for _, row in transitions_df.iterrows():
                    frm = str(row["workflowFromStateName"]).strip()
                    to = str(row["workflowToStateName"]).strip()
                    name = str(row["name"]).strip() if "name" in transitions_df.columns else ""
                    nodes.add(frm)
                    nodes.add(to)
                    edges[frm].append((to, name))

            # Determine start node
            start_node = None
            if "Product Requisition" in nodes:
                start_node = "Product Requisition"
            else:
                incoming = set()
                for frm, lst in edges.items():
                    for to, _ in lst:
                        incoming.add(to)
                candidates = [n for n in nodes if n not in incoming]
                start_node = candidates[0] if candidates else (sorted(nodes)[0] if nodes else None)

            node_order = []
            seen = set()
            if start_node:
                q = deque([start_node])
                seen.add(start_node)
                while q:
                    n = q.popleft()
                    node_order.append(n)
                    for to, _ in edges.get(n, []):
                        if to not in seen:
                            seen.add(to)
                            q.append(to)
                for n in sorted(nodes):
                    if n not in seen:
                        node_order.append(n)
            else:
                node_order = sorted(nodes)

            id_map = {node: f"P{(idx+1)*100}" for idx, node in enumerate(node_order)}

            # Create graphviz diagram
            try:
                dot = graphviz.Digraph(format=flow_format)
                dot.attr(rankdir="LR" if flow_orientation == "horizontal" else "TB")

                for node in node_order:
                    low = node.lower()
                    if node == "Product Requisition":
                        dot.node(id_map[node], label=node, shape="oval", style="filled", fillcolor="lightgrey")
                    elif ("reject" in low) or ("closed" in low):
                        dot.node(id_map[node], label=node, shape="doublecircle", style="filled", fillcolor="lightcoral")
                    else:
                        dot.node(id_map[node], label=node, shape="box")

                edge_label_map = defaultdict(list)
                for frm, lst in edges.items():
                    for to, tname in lst:
                        edge_label_map[(id_map.get(frm, frm), id_map.get(to, to))].append(tname)

                for (f, t), labels in edge_label_map.items():
                    cleaned = " | ".join([l for l in labels if l])
                    if cleaned:
                        dot.edge(f, t, label=cleaned)
                    else:
                        dot.edge(f, t)

                tmp_output = "/tmp/flowchart"
                rendered_path = dot.render(filename=tmp_output, cleanup=True)

                with open(rendered_path, "rb") as fh:
                    flowchart_bytes = fh.read()

                flow_download_name = f"Flowchart_{original_name.rsplit('.',1)[0]}.{flow_format}"

                st.download_button(
                    label=f"Download Flowchart ({flow_format.upper()})",
                    data=flowchart_bytes,
                    file_name=flow_download_name,
                    mime=f"application/{flow_format}"
                )

            except Exception as e:
                st.warning(f"Graphviz render failed: {e}. Please ensure Graphviz system package is installed.")
        elif not _GRAPHVIZ_AVAILABLE:
            st.info("Graphviz is not installed. Install it to enable flowchart generation.")
        else:
            st.info("No 'transitions' sheet found. Cannot generate flowchart.")


Overwriting app.py


In [14]:
# Install packages (if not installed already)
!pip install streamlit pyngrok openpyxl --quiet

from pyngrok import ngrok
import time

# Kill previous tunnels
ngrok.kill()

# Set your ngrok token
ngrok.set_auth_token("35gXlCpbnwCAIJ9I2xiA1GXDu2u_3UoQf6QeGYhFC9MxYapgo")

# Start Streamlit in the background
get_ipython().system_raw("streamlit run app.py &")

# Wait a few seconds for Streamlit to start
time.sleep(5)

# Open ngrok tunnel
public_url = ngrok.connect(8501)
print("Streamlit app running at:", public_url)


Streamlit app running at: NgrokTunnel: "https://armless-twirly-alanna.ngrok-free.dev" -> "http://localhost:8501"
