<a href="https://colab.research.google.com/github/Sant-78/Bank-Statement-App/blob/main/Bank_Statement_Formatter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🚀 Bank Statement TXT → Clean Excel Automation

In [1]:
# 🚀 Bank Statement TXT → Clean Excel Automation
# Author: [Your Name]

!pip install pandas openpyxl



In [3]:


import pandas as pd
import re
from google.colab import files

# --------- CONFIG ---------
UNWANTED_PATTERNS = [
    r"STATEMENT SUMMARY",
    r"VALUE POST DETAILS",
    r"BROUGHT FORWARD",
    r"CARRIED FORWARD",
    r"COUNT\s+\d+",
    r"PAGE NO",
    r"TOLL FREE",
    r"IN CASE YOUR ACCOUNT",
    r"LETTER OF AUTHORITY",
    r"POWER OF ATTORNEY",
    r"^_+$",
    r"^_+\s*_+$",
    r"^VALUE\s+POST\s+DETAILS\s+CHQ\.NO\.\s+DEBIT\s+CREDIT\s+BALANCE\s+DATE\s+DATE$"
]
KEYWORDS = [
    "BY TRF.", "TO TRF.", "NEFT", "RTGS", "ACH", "TRF FROM", "TRF TO", "LOAN CLOSURE"
]

# --------- HELPERS ---------
def indian_format(num):
    if num in ("", None):
        return ""
    try:
        x = float(num)
    except:
        return ""
    s = f"{abs(x):.2f}"
    whole, dec = s.split(".")
    last3 = whole[-3:]
    rest = whole[:-3]
    if rest:
        parts = []
        while len(rest) > 2:
            parts.append(rest[-2:])
            rest = rest[:-2]
        if rest:
            parts.append(rest)
        parts = parts[::-1]
        whole_fmt = ",".join(parts) + "," + last3
    else:
        whole_fmt = last3
    return ("-" if x < 0 else "") + whole_fmt + "." + dec

def format_details(text):
    text = re.sub(
        r"VALUE\s+POST\s+DETAILS\s+CHQ\.NO\.\s+DEBIT\s+CREDIT\s+BALANCE\s+DATE\s+DATE",
        "", text, flags=re.IGNORECASE
    )
    text = re.sub(r"\s+", " ", text.strip())
    for kw in KEYWORDS:
        text = re.sub(kw, kw.upper(), text, flags=re.IGNORECASE)
    return text

def is_unwanted(line):
    for pat in UNWANTED_PATTERNS:
        if re.search(pat, line, flags=re.IGNORECASE):
            return True
    return False

def finalize_transaction(row):
    details = re.sub(r"\s+", " ", (row.get("DetailsRaw") or "").strip())

    bal_matches = list(re.finditer(r"([0-9]{1,3}(?:,[0-9]{2,3})*\.\d{2})\s*Cr\b", details, flags=re.IGNORECASE))
    balance_val = ""
    if bal_matches:
        m = bal_matches[-1]
        balance_val = m.group(1)
        details = (details[:m.start()] + details[m.end():]).strip()

    amt_matches = list(re.finditer(r"([0-9]{1,3}(?:,[0-9]{2,3})*\.\d{2})", details))
    debit_val, credit_val = "", ""
    if amt_matches:
        value_amount = amt_matches[-1].group(1)
        details = details[:amt_matches[-1].start()] + details[amt_matches[-1].end():]
        if details.upper().startswith("BY "):
            credit_val = value_amount
        elif details.upper().startswith("TO "):
            debit_val = value_amount
        else:
            debit_val = value_amount

    details = format_details(details)

    row["Details"] = details
    row["Debit"] = debit_val
    row["Credit"] = credit_val
    row["Balance"] = balance_val
    return row

# --------- MAIN ---------
def convert_txt_to_excel(txt_path, output_path="Formatted_Bank_Statement.xlsx"):
    with open(txt_path, "r", encoding="utf-8", errors="ignore") as f:
        lines = f.readlines()

    tx_rows = []
    current = None
    tx_start = re.compile(r"^(\d{2}/\d{2}/\d{2})\s+(\d{2}/\d{2}/\d{2})\s+(.*)$")

    for raw in lines:
        line = raw.strip()
        if not line or is_unwanted(line):
            continue

        m = tx_start.match(line)
        if m:
            if current:
                current = finalize_transaction(current)
                tx_rows.append(current)
            value_date, post_date, rest = m.groups()
            current = {
                "Value": value_date,
                "Post": post_date,
                "DetailsRaw": rest
            }
        else:
            if current:
                current["DetailsRaw"] = (current.get("DetailsRaw", "") + " " + line).strip()

    if current:
        current = finalize_transaction(current)
        tx_rows.append(current)

    df = pd.DataFrame(tx_rows, columns=["Value", "Post", "Details", "Debit", "Credit", "Balance"])
    df.insert(3, "Chq.No", "")

    for col in ["Value", "Post"]:
        df[col] = pd.to_datetime(df[col], format="%d/%m/%y", errors="coerce").dt.strftime("%d-%m-%Y")

    for col in ["Debit", "Credit", "Balance"]:
        df[col] = df[col].astype(str).str.replace(",", "", regex=False)
        df[col] = df[col].apply(lambda x: indian_format(x))

    df.to_excel(output_path, index=False)
    return output_path

# --------- RUN ---------
uploaded = files.upload()
txt_path = list(uploaded.keys())[0]
out = convert_txt_to_excel(txt_path)
files.download(out)


Saving ONLINESTMT_5495346710_0000000120252139.txt to ONLINESTMT_5495346710_0000000120252139.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>