In [40]:
%%writefile app.py
import streamlit as st
import pandas as pd
from io import BytesIO
import re
import time
from openpyxl import load_workbook

st.title("Excel Form Processor")

# --- Initialize session state ---
if 'selected_sheet' not in st.session_state:
    st.session_state.selected_sheet = "Please select"
if 'column_mapping' not in st.session_state:
    st.session_state.column_mapping = {}

# Step 1: Upload Excel file
uploaded_file = st.file_uploader("Upload an Excel file", type=["xlsx"])

if uploaded_file:
    xls = pd.ExcelFile(uploaded_file)
    sheet_names = sorted(xls.sheet_names)

    st.write("### Select the sheet containing form fields:")

    selected_sheet = st.selectbox(
        "Select sheet",
        options=["Please select"] + sheet_names,
        index=0 if st.session_state.selected_sheet == "Please select" else sheet_names.index(st.session_state.selected_sheet)+1
    )

    if selected_sheet != "Please select":
        st.session_state.selected_sheet = selected_sheet
        st.success(f"Selected sheet: {selected_sheet}")
        df = pd.read_excel(uploaded_file, sheet_name=selected_sheet)
        st.write("### Select columns for required fields:")

        required_fields = ["Field Name", "Field Order", "Field Type", "Form Name"]

        sorted_columns = sorted(df.columns)
        for field in required_fields:
            if field not in st.session_state.column_mapping:
                st.session_state.column_mapping[field] = "Please select"

            selected_col = st.selectbox(
                f"Select column for {field}",
                options=["Please select"] + sorted_columns,
                index=0 if st.session_state.column_mapping[field] == "Please select"
                      else sorted_columns.index(st.session_state.column_mapping[field])+1,
                key=f"col_select_{field}"
            )

            st.session_state.column_mapping[field] = selected_col

        column_mapping = st.session_state.column_mapping

        if all(v != "Please select" for v in column_mapping.values()):
            st.success("All required columns selected!")

            form_col = column_mapping["Form Name"]
            field_name_col = column_mapping["Field Name"]
            field_order_col = column_mapping["Field Order"]
            field_type_col = column_mapping["Field Type"]

            all_forms = sorted(df[form_col].dropna().unique())

            selected_forms = st.multiselect(
                "Select forms to process (or leave all selected for all forms):",
                options=all_forms,
                default=all_forms
            )

            category_columns = sorted([c for c in df.columns if c not in column_mapping.values()])
            selected_category_cols = st.multiselect(
                "Select additional columns (0/1) to use for Categories (optional):",
                options=category_columns
            )

            if "fieldMapping" in sheet_names:
                field_mapping_df = pd.read_excel(uploaded_file, sheet_name="fieldMapping")
            else:
                field_mapping_df = pd.DataFrame(columns=["formName", "fieldName"])

            if st.button("Generate Excel Report"):
                output = BytesIO()
                total_forms = len(selected_forms)
                progress_bar = st.progress(0)

                uploaded_file.seek(0)
                wb = load_workbook(uploaded_file)

                for i, form in enumerate(selected_forms):
                    # Filter by form and sort
                    form_df = df[df[form_col] == form].sort_values(by=field_order_col)
                    final_df = form_df[[field_name_col, field_type_col, field_order_col]].copy()

                    # Add "Repeater" column based on fieldMapping
                    repeater_list = []
                    filtered_mapping = field_mapping_df[field_mapping_df['formName'] == form]
                    repeater_fields = set(filtered_mapping['fieldName'].dropna())
                    for field in final_df[field_name_col]:
                        repeater_list.append("Yes" if field in repeater_fields else "")
                    final_df.insert(len(final_df.columns), "Repeater", repeater_list)

                    # Add Categories column
                    if selected_category_cols:
                        categories_list = []
                        for idx, row in form_df.iterrows():
                            selected = [col for col in selected_category_cols if row[col] == 1]
                            selected.sort()
                            categories_list.append(",".join(selected) if selected else "")
                        final_df['Categories'] = categories_list

                    # Sanitize sheet name
                    sheet_name = re.sub(r'[\\/*?:\\[\\]]', '_', str(form))[:31]

                    # Remove existing sheet if present
                    if sheet_name in wb.sheetnames:
                        std = wb[sheet_name]
                        wb.remove(std)

                    # Create new sheet
                    ws = wb.create_sheet(title=sheet_name)

                    # Write Form Name at top
                    ws.cell(row=1, column=1, value=f"Form: {form}")

                    # Write headers starting from row 2
                    for c_idx, header in enumerate(final_df.columns, start=1):
                        ws.cell(row=2, column=c_idx, value=header)

                    # Write data starting from row 3
                    for r_idx, row in enumerate(final_df.values, start=3):
                        for c_idx, value in enumerate(row, start=1):
                            ws.cell(row=r_idx, column=c_idx, value=value)

                    progress = int((i + 1) / total_forms * 100)
                    progress_bar.progress(progress)
                    time.sleep(0.05)

                wb.save(output)
                st.success("Original Excel modified successfully!")
                st.download_button(
                    label="Download Modified Excel",
                    data=output.getvalue(),
                    file_name="modified.xlsx",
                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                )


Overwriting app.py


In [None]:
# Install packages (if not installed already)
!pip install streamlit pyngrok openpyxl --quiet

from pyngrok import ngrok
import time

# Kill previous tunnels
ngrok.kill()

# Set your ngrok token
ngrok.set_auth_token("35gXlCpbnwCAIJ9I2xiA1GXDu2u_3UoQf6QeGYhFC9MxYapgo")

# Start Streamlit in the background
get_ipython().system_raw("streamlit run app.py &")

# Wait a few seconds for Streamlit to start
time.sleep(5)

# Open ngrok tunnel
public_url = ngrok.connect(8501)
print("Streamlit app running at:", public_url)
