# 🏦 Web-Based Membership Management System


This notebook processes bank statements and matches transactions to members in the database. It follows these steps:

1. **Setup**: Import libraries and configure Django environment
2. **Data Loading**: Load and validate bank statement data
3. **Processing**: Clean and standardize transaction data
4. **Matching**: Link transactions to members using prioritized matching
5. **Export**: Save results to Excel files

## Dependencies
- Django
- pandas
- numpy
- xlsxwriter
- ipywidgets

In [1]:
# Setup: Import Django models and configure the environment


# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np
import re
from datetime import datetime
from decimal import Decimal
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, HTML
import warnings
import threading
import django

# Required for Excel export with formatting
try:
    import xlsxwriter
except ImportError:
    print("⚠️ xlsxwriter module not found. Installing...")
    import pip
    pip.main(['install', 'xlsxwriter'])
    import xlsxwriter
    print("✅ xlsxwriter installed successfully")



# Helper function for threading (moved up from the helper cell)
def run_in_thread(func):
    """Execute a function in a thread to avoid async issues with Django"""
    import threading
    result = []
    error = []
    
    def wrapped_func():
        try:
            result.append(func())
        except Exception as e:
            error.append(e)
    
    thread = threading.Thread(target=wrapped_func)
    thread.start()
    thread.join()
    
    if error:
        raise error[0]
    
    return result[0] if result else None

# Add project directory to path and set up Django
project_dir = r'C:/Work/active_projects/web-based-membership-management-system'
if project_dir not in sys.path:
    sys.path.append(project_dir)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'yzer_membership.settings')
django.setup()

# Import Django models and query tools
from members.models import Member, PaymentRecord, BankStatementImport
from django.db.models import Q

print("✅ Django environment set up successfully")
print(f"✅ Connected to database: {Member._meta.app_config.name}")
print(f"✅ Project directory: {project_dir}")
print(f"✅ Input directory: {os.path.join(project_dir, 'data_input')}")
print(f"✅ Output directory: {os.path.join(project_dir, 'data_output')}")

# For demo purposes, display a count of members in the database using a thread
def get_counts():
    return (
        Member.objects.count(),
        PaymentRecord.objects.count()
    )

member_count, payment_count = run_in_thread(get_counts)
print(f"📊 Database contains {member_count} members and {payment_count} payment records")

# --- File and column settings for loading bank statement ---
file_path = r'C:/Work/active_projects/web-based-membership-management-system/data_input/statement-08-240-533-6 Feb-July.xlsx'
sheet = 'Sheet1'  # Sheet name as provided
date_col = 'Date'  # Column name for transaction date
type_col = 'Type'  # Column name for transaction type
description_col = 'Description'  # Column name for transaction description
amount_col = 'Amount'  # Column name for transaction amount
min_amount = 50.0  # Minimum amount filter

✅ Django environment set up successfully
✅ Connected to database: members
✅ Project directory: C:/Work/active_projects/web-based-membership-management-system
✅ Input directory: C:/Work/active_projects/web-based-membership-management-system\data_input
✅ Output directory: C:/Work/active_projects/web-based-membership-management-system\data_output
📊 Database contains 2049 members and 0 payment records


## Utility Functions

The following cell contains utility functions for data processing and member matching.

In [2]:
# Utility functions for data processing and member matching
from typing import Optional, List, Dict, Any
from decimal import Decimal
import pandas as pd
import re
from django.db.models import Q
from fuzzywuzzy import fuzz

def clean_amount(amount_str: Any) -> Decimal:
    """Convert amount to Decimal"""
    if pd.isna(amount_str) or amount_str is None or amount_str == "":
        return Decimal('0.00')
    if isinstance(amount_str, (int, float)):
        return Decimal(str(amount_str))
    cleaned = re.sub(r'[R\s,]', '', str(amount_str))
    try:
        return Decimal(cleaned)
    except Exception as e:
        return Decimal('0.00')

class MemberMatcher:
    """Class to handle member matching logic with caching"""
    
    def __init__(self):
        self.member_cache = {}
        self.status_priority = {'paid_up_member': 1, 'unpaid_member': 2}
    
    def normalize_name(self, text: str) -> str:
        """Normalize a name for comparison"""
        if not text:
            return ""
        text = text.lower()
        text = text.split(',')[0]
        text = re.sub(r'[^a-z0-9\s]', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    
    def get_members(self) -> List[Dict]:
        """Get all members from database with caching"""
        if not self.member_cache:
            members = run_in_thread(lambda: list(Member.objects.all().values(
                "id", "first_name", "last_name", "membership_status", "email"
            )))
            members = [m for m in members if m.get('first_name') and m.get('last_name')]
            self.member_cache = {m['id']: m for m in members}
        return list(self.member_cache.values())
    
    def match_transaction(self, txn_desc: str) -> Optional[Dict]:
        """Match a transaction description to a member"""
        txn_desc = self.normalize_name(txn_desc)
        best_match = None
        best_score = 0
        
        # First try direct last name matches
        for member in self.get_members():
            last_name = self.normalize_name(member['last_name'])
            first_name = self.normalize_name(member['first_name'])
            
            # Try exact last name match first
            if last_name in txn_desc:
                score = 90  # Base score for last name match
                
                # Boost score if first name matches too
                if first_name in txn_desc:
                    score += 10
                
                # Extra boost for paid_up_member status
                if member['membership_status'].lower() == 'paid_up_member':
                    score += 50
                
                if score > best_score:
                    best_score = score
                    best_match = member
        
        return best_match

# Create global matcher instance
member_matcher = MemberMatcher()

print("✅ Utility and matching functions cell completed OK")

✅ Utility and matching functions cell completed OK


In [3]:
from decimal import Decimal
def clean_amount(amount_str):
    """
    Convert string amount to Decimal, handling different formats
    
    Parameters:
    -----------
    amount_str : str, int, or float
        The amount value to clean
        
    Returns:
    --------
    Decimal
        The cleaned amount as a Decimal value
    """
    if isinstance(amount_str, (int, float)):
        return Decimal(str(amount_str))
    
    if pd.isna(amount_str) or not amount_str:
        return Decimal('0.00')
    
    # Remove currency symbols, spaces, and commas
    cleaned = re.sub(r'[R\s,]', '', str(amount_str))
    
    try:
        return Decimal(cleaned)
    except:
        print(f"Warning: Could not convert '{amount_str}' to decimal. Using 0.00")
        return Decimal('0.00')


def extract_potential_names(text):
    """
    Extract potential names from transaction text
    
    Parameters:
    -----------
    text : str
        The text to analyze for potential names
        
    Returns:
    --------
    list
        List of potential name strings
    """
    if pd.isna(text) or not text:
        return []
    
    # Convert to string if it's not already
    text = str(text)
    
    # Find potential names (uppercase words or words with initial capital)
    potential_names = []
    
    # First, try to find common surname prefixes (van, de, etc.)
    compound_matches = re.findall(r'\b(?:van|de|du|der|den|la)\s+[A-Z][a-z]+\b', text, re.IGNORECASE)
    potential_names.extend(compound_matches)
    
    # Match words that are likely names (start with capital letter, at least 3 chars)
    name_matches = re.findall(r'\b[A-Z][a-z]{2,}\b', text)
    potential_names.extend(name_matches)
    
    # Match all caps words that might be surnames
    caps_matches = re.findall(r'\b[A-Z]{3,}\b', text)
    potential_names.extend(caps_matches)
    
    return potential_names


# Function to find a member by name
def find_member_by_name(name):
    matches = Member.objects.filter(
        Q(last_name__iexact=name) | 
        Q(first_name__iexact=name) |
        Q(last_name__istartswith=name) |
        Q(first_name__istartswith=name)
    )
    
    if matches.count() == 1:
        return matches.first()
    return None


# Function to find a member by word
def find_member_by_word(word):
    if word.lower() in ['the', 'and', 'for', 'from', 'payment', 'transfer', 'fee', 'fees', 'member', 'membership']:
        return None
        
    matches = Member.objects.filter(
        Q(last_name__iexact=word) | 
        Q(first_name__iexact=word)
    )
    
    if matches.count() == 1:
        return matches.first()
    return None


def match_member(reference, description, amount):
    """
    Try to match a bank transaction to a member
    
    Parameters:
    -----------
    reference : str
        Transaction reference text
    description : str
        Transaction description text
    amount : Decimal
        Transaction amount
        
    Returns:
    --------
    Member or None
        Member object if a match is found, None otherwise
    """
    # Extract potential names
    potential_names = []
    
    if reference:
        potential_names.extend(extract_potential_names(reference))
    
    if description:
        potential_names.extend(extract_potential_names(description))
    
    # Remove duplicates
    potential_names = list(set(potential_names))
    
    # Try each potential name
    for name in potential_names:
        # Use threading to call the database function
        member = run_in_thread(find_member_by_name, name)
        if member:
            return member
    
    # Try words in reference and description
    for text in [reference, description]:
        if not text:
            continue
            
        # Extract words (3+ characters)
        words = re.findall(r'\b[A-Za-z]{3,}\b', text)
        
        for word in words:
            # Use threading to call the database function
            member = run_in_thread(find_member_by_word, word)
            if member:
                return member
    
    # No match found
    return None


# Test the clean_amount function
test_amounts = [100, "R100", "R 100.00", "100,50", "R100,50", None, ""]
print("Testing clean_amount function:")
for amount in test_amounts:
    print(f"  {amount} -> {clean_amount(amount)}")

Testing clean_amount function:
  100 -> 100
  R100 -> 100
  R 100.00 -> 100.00
  100,50 -> 10050
  R100,50 -> 10050
  None -> 0.00
   -> 0.00


In [6]:
# Load and validate all Excel files matching the pattern in the data_input folder
import os
import glob
import pandas as pd
from IPython.display import display

# Pattern for all statement files
input_dir = os.path.join(project_dir, 'data_input')
file_pattern = os.path.join(input_dir, 'statement-08-240-533-6*.xlsx')
files = glob.glob(file_pattern)

if not files:
    print(f"❌ No files found matching pattern: {file_pattern}")
else:
    for file_path in files:
        print(f"\n=== Processing file: {os.path.basename(file_path)} ===")
        try:
            # Read data from B4:F (skip C), no headers, assign custom column names
            df = pd.read_excel(
                file_path,
                header=None,
                usecols="B,D,E,F",
                skiprows=3,  # skip first 3 rows, so row 4 is first
                engine='openpyxl'
            )
            df.columns = ['Date', 'Amount', 'Type', 'Description']
            print(f"✅ Loaded {len(df)} rows from {file_path}")
            # Drop duplicate rows
            before = len(df)
            df = df.drop_duplicates()
            after = len(df)
            if after < before:
                print(f"🗑️ Removed {before - after} duplicate rows. {after} rows remain.")
            # Filter to only include positive amounts if amount_col is set
            payments_df = df[df['Amount'] >= min_amount].copy()
            print(f"✅ Found {len(payments_df)} potential payments (amount > {min_amount})")
            print("\n🔍 Sample data (first 5 rows):")
            display(payments_df.head())
            print("\n📊 Basic statistics for amount column:")
            display(payments_df['Amount'].describe())
        except Exception as e:
            print(f"❌ Error loading or validating file: {e}")


=== Processing file: statement-08-240-533-6 20240828.xlsx ===
✅ Loaded 61 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20240828.xlsx
✅ Found 52 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250131,200.0,IB PAYMENT FROM,COGHO - OCTOBER 2023
6,20250214,100.0,IB PAYMENT FROM,DANIE SADIE
7,20250214,200.0,IB PAYMENT FROM,S BURGER FIRTREEPARK@CW
8,20250214,100.0,IB PAYMENT FROM,JACO & ELITA RICHTER
9,20250214,100.0,CREDIT TRANSFER,INVESTECPBLesleyJacobs



📊 Basic statistics for amount column:


count      52.000000
mean      305.396346
std       860.461113
min       100.000000
25%       100.000000
50%       100.000000
75%       200.000000
max      6180.610000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250218.xlsx ===
✅ Loaded 37 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250218.xlsx
✅ Found 32 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250124,250.0,CREDIT TRANSFER,ABSA BANK Membership&Donation
1,20250127,100.0,MAGTAPE CREDIT,ULANDA NEVELING
2,20250131,200.0,IB PAYMENT FROM,COGHO - OCTOBER 2023
8,20250214,100.0,IB PAYMENT FROM,DANIE SADIE
9,20250214,200.0,IB PAYMENT FROM,S BURGER FIRTREEPARK@CW



📊 Basic statistics for amount column:


count      32.000000
mean      505.379687
std      1662.496806
min       100.000000
25%       100.000000
50%       100.000000
75%       200.000000
max      9522.150000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250310.xlsx ===
✅ Loaded 84 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250310.xlsx
✅ Found 71 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
1,20250214,100.0,IB PAYMENT FROM,DANIE SADIE
2,20250214,200.0,IB PAYMENT FROM,S BURGER FIRTREEPARK@CW
3,20250214,100.0,IB PAYMENT FROM,JACO & ELITA RICHTER
4,20250214,100.0,CREDIT TRANSFER,INVESTECPBLesleyJacobs
5,20250214,100.0,CREDIT TRANSFER,INVESTECPBNelia 082 899 2248



📊 Basic statistics for amount column:


count       71.000000
mean       476.036761
std       2363.868839
min        100.000000
25%        100.000000
50%        100.000000
75%        200.000000
max      20000.000000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250317.xlsx ===
✅ Loaded 24 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250317.xlsx
✅ Found 19 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250303,200.0,IB PAYMENT FROM,BELLAMY
1,20250303,200.0,MAGTAPE CREDIT,EDITH LUKE MEMBER FE
2,20250303,100.0,CREDIT TRANSFER,ABSA BANK A Zietsman
3,20250303,100.0,CREDIT TRANSFER,ABSA BANK William Roberts
5,20250304,20000.0,IB TRANSFER FROM,cashflow



📊 Basic statistics for amount column:


count       19.000000
mean      1267.990000
std       4552.364519
min        100.000000
25%        100.000000
50%        100.000000
75%        200.000000
max      20000.000000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250424.xlsx ===
✅ Loaded 19 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250424.xlsx
✅ Found 12 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250328,100.0,REAL TIME TRANSFER FROM,Alison ordinary1715831479
1,20250328,100.0,CREDIT TRANSFER,ABSA BANK W Kotze 0824537817
2,20250331,100.0,IB PAYMENT FROM,BRIGITTE ERDMANN
3,20250331,100.0,CREDIT TRANSFER,ABSA BANK Dirk Nel. Member fee
6,20250401,100.0,CREDIT TRANSFER,ABSA BANK mike sanger



📊 Basic statistics for amount column:


count       12.000000
mean      1366.955000
std       2973.626135
min        100.000000
25%        100.000000
50%        100.000000
75%        475.000000
max      10000.000000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250704.xlsx ===
✅ Loaded 11 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250704.xlsx
✅ Found 3 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250506,250.0,CREDIT TRANSFER,Donation Auram SA R Prag
2,20250531,300.0,CREDIT TRANSFER,DONATION
10,0,2315.26,CLOSE BALANCE,



📊 Basic statistics for amount column:


count       3.000000
mean      955.086667
std      1178.209923
min       250.000000
25%       275.000000
50%       300.000000
75%      1307.630000
max      2315.260000
Name: Amount, dtype: float64


=== Processing file: statement-08-240-533-6 20250725.xlsx ===
✅ Loaded 4 rows from C:/Work/active_projects/web-based-membership-management-system\data_input\statement-08-240-533-6 20250725.xlsx
✅ Found 4 potential payments (amount > 50.0)

🔍 Sample data (first 5 rows):


Unnamed: 0,Date,Amount,Type,Description
0,20250714,100.0,IB PAYMENT FROM,PIETER LE ROUX ORD. M.SHI
1,20250714,100.0,IB PAYMENT FROM,DAVID WIGHTMAN 44 MAIN RD
2,20250714,100.0,CREDIT TRANSFER,ABSA BANK mike sanger
3,0,5946.66,CLOSE BALANCE,



📊 Basic statistics for amount column:


count       4.000
mean     1561.665
std      2923.330
min       100.000
25%       100.000
50%       100.000
75%      1561.665
max      5946.660
Name: Amount, dtype: float64

In [8]:
# Combine all statement records, clean data, and export to Excel with timestamp, then open the file
import os
import glob
import pandas as pd
from datetime import datetime
import platform

input_dir = os.path.join(project_dir, 'data_input')
file_pattern = os.path.join(input_dir, 'statement-08-240-533-6*.xlsx')
files = glob.glob(file_pattern)

all_records = []
for file_path in files:
    try:
        df = pd.read_excel(
            file_path,
            header=None,
            usecols="B,D,E,F",
            skiprows=3,
            engine='openpyxl'
        )
        df.columns = ['Date', 'Amount', 'Type', 'Description']
        all_records.append(df)
    except Exception as e:
        print(f"❌ Error loading {file_path}: {e}")

if all_records:
    combined_df = pd.concat(all_records, ignore_index=True)
    print(f"✅ Combined {len(combined_df)} records from {len(files)} files.")
    # Data cleanup: remove Amount < 0 and Date == 0
    before_cleanup = len(combined_df)
    combined_df = combined_df[combined_df['Amount'] >= 0]
    combined_df = combined_df[combined_df['Date'] != 0]
    after_cleanup = len(combined_df)
    print(f"🧹 Removed {before_cleanup - after_cleanup} records with Amount < 0 or Date == 0. {after_cleanup} records remain.")
    # Export to Excel with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = os.path.join(project_dir, 'data_output')
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f'combined_bank_records_{timestamp}.xlsx')
    combined_df.to_excel(output_path, index=False)
    print(f"✅ Exported cleaned DataFrame to: {output_path}")
    display(combined_df.head())
    # Automatically open the file after export
    try:
        if platform.system() == 'Windows':
            os.startfile(output_path)
        elif platform.system() == 'Darwin':
            os.system(f'open "{output_path}"')
        else:
            os.system(f'xdg-open "{output_path}"')
        print(f"✅ Opened file: {output_path}")
    except Exception as e:
        print(f"⚠️ Could not open file automatically: {e}")
else:
    print("❌ No records to combine and export.")

✅ Combined 240 records from 7 files.
✅ Exported combined DataFrame to: C:/Work/active_projects/web-based-membership-management-system\data_output\combined_bank_records_20250808_091247.xlsx


Unnamed: 0,Date,Amount,Type,Description
0,20250131,200.0,IB PAYMENT FROM,COGHO - OCTOBER 2023
1,20250131,-46.7,SERVICE FEE,
2,20250131,-95.0,MONTHLY MANAGEMENT FEE,
3,20250203,-149.0,ACCOUNT PAYMENT,MULTID FORXNEELO 85541122
4,20250205,-1044.3,IB PAYMENT TO,BEN TROMP ONDERHOUD


✅ Opened file: C:/Work/active_projects/web-based-membership-management-system\data_output\combined_bank_records_20250808_091247.xlsx
