# House Construction Expense Tracker

This notebook helps you track and analyze expenses for your house construction project. You can:
1. Record expenses with categories and subcategories
2. View total expenses by category
3. Track spending patterns over time
4. Visualize expense distribution
5. Auto-save data to CSV, Excel, and PDF reports
6. Create automatic backups
7. Generate comprehensive PDF analysis reports

All data is stored securely in the `house_expenses_data` folder.

## 1. Import Required Libraries

First, let's import the necessary Python libraries for data management and visualization.

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import shutil
import os
from fpdf import FPDF
import io

# Set the style for better visualizations
sns.set_style("whitegrid")
sns.set_palette("husl")

## 2. Setup Expense Categories and Data Storage

Define the expense categories and setup data storage structure.

In [10]:
import os
import pandas as pd

# Define the data directory structure
DATA_DIR = 'house_expenses_data'
CSV_FILE = os.path.join(DATA_DIR, 'expenses.csv')
EXCEL_FILE = os.path.join(DATA_DIR, 'house_expenses.xlsx')
BACKUP_DIR = os.path.join(DATA_DIR, 'backups')

# Create necessary directories
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(BACKUP_DIR, exist_ok=True)

# Define expense categories and subcategories
expense_categories = {
    'Planning': ['House Plan', 'Other Expenses'],
    'House Plot': ['Land Cost', 'Land Registration'],
    'Building Plan Approval': ['EC market value', 'Property Tax'],
    'Initial Setup': ['Basic Things', 'Construction Equipment'],
    'Transportation': ['Transportation', 'Coolie (Wood)', 'Permits', 'Personal Transport'],
    'Building Material': {
        'Wood': ['Material (Wood)'],
        'Stone': ['RR Stones', 'Basement Stones', 'kankara', 'Ramapuram Kankara'],
        'Sand': ['Sand'],
        'Iron': ['Iron', 'Binding wire'],
        'Cement': ['Cement'],
        'Bricks': ['Bricks']
    },
    'Construction': [
        'Coolie', 'Labour charges', 'JCB', 'Rent for Miller & rekulu',
        'Bhumi Pooja', 'Carpenter', 'Sand', 'Basement Stones', 'kankara',
        'Ramapuram Kankara', 'Iron', 'Cement', 'Bricks', 'Coolie (extra charges)'
    ],
    'Pooja': ['Bhumi Pooja'],
    'Doors & Windows': ['Carpenter']
}

# Create an empty DataFrame to store expenses
expenses_df = pd.DataFrame(columns=[
    'Date',
    'Category',
    'Subcategory',
    'Description',
    'Amount',
    'Payment_Method',
    'Receipt_Reference'
])

# Print available categories and subcategories
print("Available Expense Categories:")
for category, subcategories in expense_categories.items():
    print(f"\n{category}:")
    if isinstance(subcategories, dict):
        for subcat_type, subcat_list in subcategories.items():
            print(f"  {subcat_type}:")
            for sub in subcat_list:
                print(f"    - {sub}")
    else:
        for sub in subcategories:
            print(f"  - {sub}")

Available Expense Categories:

Planning:
  - House Plan
  - Other Expenses

House Plot:
  - Land Cost
  - Land Registration

Building Plan Approval:
  - EC market value
  - Property Tax

Initial Setup:
  - Basic Things
  - Construction Equipment

Transportation:
  - Transportation
  - Coolie (Wood)
  - Permits
  - Personal Transport

Building Material:
  Wood:
    - Material (Wood)
  Stone:
    - RR Stones
    - Basement Stones
    - kankara
    - Ramapuram Kankara
  Sand:
    - Sand
  Iron:
    - Iron
    - Binding wire
  Cement:
    - Cement
  Bricks:
    - Bricks

Construction:
  - Coolie
  - Labour charges
  - JCB
  - Rent for Miller & rekulu
  - Bhumi Pooja
  - Carpenter
  - Sand
  - Basement Stones
  - kankara
  - Ramapuram Kankara
  - Iron
  - Cement
  - Bricks
  - Coolie (extra charges)

Pooja:
  - Bhumi Pooja

Doors & Windows:
  - Carpenter


## 3. Data Management Functions

Create functions to:
- Add new expenses
- Save data to CSV and Excel
- Load existing data
- Create automatic backups

In [11]:
def add_multiple_expenses(expenses_list):
    """
    Add multiple expenses at once
    expenses_list: list of dictionaries containing expense details
    """
    for expense in expenses_list:
        add_expense(
            date=expense['date'],
            category=expense['category'],
            subcategory=expense['subcategory'],
            description=expense['description'],
            amount=expense['amount'],
            payment_method=expense.get('payment_method', 'Not Specified'),
            receipt_reference=expense.get('receipt_reference', '')
        )
    print(f"Added {len(expenses_list)} expenses successfully.")

def save_data(generate_pdf=True):
    """
    Save the expense data to CSV, Excel, and PDF files with automatic backup
    
    Parameters:
    -----------
    generate_pdf : bool, default=True
        Whether to generate a PDF report with expense analysis
    """
    global expenses_df
    
    # Sort data by date
    expenses_df = expenses_df.sort_values('Date')
    
    # Remove duplicates based on all columns except Receipt_Reference
    expenses_df = expenses_df.drop_duplicates(
        subset=['Date', 'Category', 'Subcategory', 'Description', 'Amount', 'Payment_Method'],
        keep='last'
    )
    
    # Save to CSV
    expenses_df.to_csv(CSV_FILE, index=False)
    
    # Save to Excel with formatting
    with pd.ExcelWriter(EXCEL_FILE, engine='openpyxl') as writer:
        expenses_df.to_excel(writer, index=False, sheet_name='Expenses')
    
    # Create backup with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_file = os.path.join(BACKUP_DIR, f'expenses_backup_{timestamp}.csv')
    shutil.copy2(CSV_FILE, backup_file)
    
    # Generate PDF report if requested
    if generate_pdf:
        pdf_file = os.path.join(DATA_DIR, 'expense_report.pdf')
        generate_pdf_report(pdf_file)
        print(f"Generated PDF report: {pdf_file}")
    
    print(f"Saved {len(expenses_df)} expense records successfully.")

def generate_pdf_report(output_file):
    """
    Generate a comprehensive PDF report with expense analysis and visualizations
    
    Parameters:
    -----------
    output_file : str
        Path to save the PDF report
    """
    class PDF(FPDF):
        def header(self):
            # Logo or title
            self.set_font('Arial', 'B', 15)
            self.cell(0, 10, 'House Construction Expense Report', 0, 1, 'C')
            self.set_font('Arial', 'I', 10)
            self.cell(0, 6, f'Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1, 'C')
            self.ln(5)

        def footer(self):
            # Footer
            self.set_y(-15)
            self.set_font('Arial', 'I', 8)
            self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

        def chapter_title(self, title):
            self.set_font('Arial', 'B', 14)
            self.set_fill_color(230, 230, 230)
            self.cell(0, 10, title, 0, 1, 'L', fill=True)
            self.ln(5)
            
        def section_title(self, title):
            self.set_font('Arial', 'B', 12)
            self.cell(0, 10, title, 0, 1, 'L')
            self.ln(2)

    # Create PDF object
    pdf = PDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    
    # Cover Page
    pdf.add_page()
    pdf.set_font('Arial', 'B', 24)
    pdf.cell(0, 40, 'House Construction', 0, 1, 'C')
    pdf.cell(0, 20, 'Expense Report', 0, 1, 'C')
    
    pdf.set_font('Arial', '', 12)
    pdf.cell(0, 10, f'Period: {expenses_df["Date"].min().strftime("%Y-%m-%d")} to {expenses_df["Date"].max().strftime("%Y-%m-%d")}', 0, 1, 'C')
    pdf.cell(0, 10, f'Total Expenditure: {format_pdf_indian_currency(expenses_df["Amount"].sum())}', 0, 1, 'C')
    pdf.cell(0, 10, f'Report Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1, 'C')

    # Executive Summary Page
    pdf.add_page()
    pdf.chapter_title('Executive Summary')
    
    # Add summary information
    total_expense = expenses_df['Amount'].sum()
    num_transactions = len(expenses_df)
    
    pdf.set_font('Arial', '', 11)
    pdf.multi_cell(0, 6, f"This report provides a comprehensive analysis of house construction expenses from {expenses_df['Date'].min().strftime('%Y-%m-%d')} to {expenses_df['Date'].max().strftime('%Y-%m-%d')}. A total of {num_transactions} expense transactions have been recorded, amounting to {format_pdf_indian_currency(total_expense)}.", 0, 'J')
    pdf.ln(5)
    
    # Key metrics
    pdf.section_title('Key Metrics')
    pdf.set_font('Arial', '', 10)
    
    # Create a metrics table
    metrics_data = [
        ['Total Expenses', format_pdf_indian_currency(total_expense)],
        ['Total Transactions', str(num_transactions)],
        ['Average Expense', format_pdf_indian_currency(total_expense / num_transactions if num_transactions > 0 else 0)],
        ['Largest Expense', format_pdf_indian_currency(expenses_df['Amount'].max())],
        ['Smallest Expense', format_pdf_indian_currency(expenses_df['Amount'].min())],
        ['Duration', f"{(expenses_df['Date'].max() - expenses_df['Date'].min()).days + 1} days"]
    ]
    
    # Draw metrics table
    col_widths = [90, 90]
    for row in metrics_data:
        pdf.set_font('Arial', 'B', 10)
        pdf.cell(col_widths[0], 8, row[0], 1)
        pdf.set_font('Arial', '', 10)
        pdf.cell(col_widths[1], 8, row[1], 1)
        pdf.ln()
    
    # Add category-wise expenses
    pdf.add_page()
    pdf.chapter_title('Expense Breakdown by Category')
    pdf.section_title('Category Distribution')
    pdf.set_font('Arial', '', 10)
    
    # Create a table
    category_totals = expenses_df.groupby('Category')['Amount'].sum().sort_values(ascending=False)
    
    # Table headers
    pdf.set_font('Arial', 'B', 10)
    pdf.set_fill_color(240, 240, 240)
    pdf.cell(80, 8, 'Category', 1, 0, 'C', fill=True)
    pdf.cell(40, 8, 'Amount', 1, 0, 'C', fill=True)
    pdf.cell(30, 8, 'Percentage', 1, 0, 'C', fill=True)
    pdf.cell(30, 8, '# Transactions', 1, 1, 'C', fill=True)
    
    # Table data
    pdf.set_font('Arial', '', 10)
    category_counts = expenses_df.groupby('Category').size()
    
    for category, amount in category_totals.items():
        percentage = (amount / total_expense) * 100
        pdf.cell(80, 7, f"{category}", 1)
        pdf.cell(40, 7, f"{format_pdf_indian_currency(amount)}", 1, 0, 'R')
        pdf.cell(30, 7, f"{percentage:.1f}%", 1, 0, 'C')
        pdf.cell(30, 7, f"{category_counts[category]}", 1, 1, 'C')
    
    # Total row
    pdf.set_font('Arial', 'B', 10)
    pdf.cell(80, 8, "TOTAL", 1, 0, 'L', fill=True)
    pdf.cell(40, 8, f"{format_pdf_indian_currency(total_expense)}", 1, 0, 'R', fill=True)
    pdf.cell(30, 8, "100.0%", 1, 0, 'C', fill=True)
    pdf.cell(30, 8, f"{num_transactions}", 1, 1, 'C', fill=True)
    
    # Add monthly summary
    pdf.add_page()
    pdf.chapter_title('Monthly Expense Analysis')
    pdf.section_title('Monthly Totals')
    
    # Create monthly expense table
    monthly_expenses = expenses_df.set_index('Date').resample('ME')['Amount'].sum().sort_index()
    monthly_counts = expenses_df.set_index('Date').resample('ME').size()
    
    # Table headers
    pdf.set_font('Arial', 'B', 10)
    pdf.set_fill_color(240, 240, 240)
    pdf.cell(60, 8, 'Month', 1, 0, 'C', fill=True)
    pdf.cell(60, 8, 'Amount', 1, 0, 'C', fill=True)
    pdf.cell(35, 8, '% of Total', 1, 0, 'C', fill=True)
    pdf.cell(35, 8, '# Transactions', 1, 1, 'C', fill=True)
    
    # Table data
    pdf.set_font('Arial', '', 10)
    for date, amount in monthly_expenses.items():
        percentage = (amount / total_expense) * 100
        pdf.cell(60, 7, f"{date.strftime('%B %Y')}", 1)
        pdf.cell(60, 7, f"{format_pdf_indian_currency(amount)}", 1, 0, 'R')
        pdf.cell(35, 7, f"{percentage:.1f}%", 1, 0, 'C')
        pdf.cell(35, 7, f"{monthly_counts[date]}", 1, 1, 'C')
    
    # Add visualizations
    pdf.add_page()
    pdf.chapter_title('Expense Visualizations')
    
    # Create and save a pie chart
    plt.figure(figsize=(10, 6))
    plt.style.use('seaborn-v0_8-pastel')
    
    # Only plot top 8 categories, group others as "Other"
    if len(category_totals) > 8:
        top_categories = category_totals.head(7)
        other_total = category_totals[7:].sum()
        plot_data = pd.concat([top_categories, pd.Series({'Other': other_total})])
    else:
        plot_data = category_totals
    
    # Create explode effect for largest slice
    explode = [0] * len(plot_data)
    explode[0] = 0.1  # Explode largest category
    
    # Plot pie chart with better styling
    wedges, texts, autotexts = plt.pie(
        plot_data, 
        labels=plot_data.index, 
        autopct='%1.1f%%', 
        startangle=90, 
        explode=explode,
        shadow=True,
        textprops={'fontsize': 9}
    )
    
    # Make percentage labels more readable
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    plt.title('Expense Distribution by Category', fontsize=14, pad=20)
    plt.tight_layout()
    
    # Save figure to a temporary file
    temp_chart_file = os.path.join(DATA_DIR, 'temp_chart.png')
    plt.savefig(temp_chart_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Add the chart to the PDF
    pdf.image(temp_chart_file, x=10, y=pdf.get_y(), w=190)
    
    # Create and add a time series chart
    pdf.add_page()
    pdf.section_title('Expense Trends Over Time')
    
    # Cumulative expense plot
    plt.figure(figsize=(12, 6))
    plt.style.use('seaborn-v0_8-whitegrid')
    
    expenses_df_sorted = expenses_df.sort_values('Date')
    expenses_df_sorted['Cumulative'] = expenses_df_sorted['Amount'].cumsum()
    
    plt.plot(expenses_df_sorted['Date'], expenses_df_sorted['Cumulative'], 
             marker='o', linestyle='-', color='#1f77b4', linewidth=2, markersize=5)
    
    # Add annotations for significant jumps
    threshold = expenses_df['Amount'].max() * 0.1  # 10% of max expense
    
    significant_jumps = expenses_df_sorted[
        expenses_df_sorted['Amount'] > threshold
    ].sort_values('Amount', ascending=False).head(3)
    
    for idx, expense in significant_jumps.iterrows():
        plt.annotate(
            f"Rs.{expense['Amount']:,.0f}\n{expense['Category']}",
            xy=(expense['Date'], expense['Cumulative']),
            xytext=(10, 20),
            textcoords='offset points',
            arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=.2')
        )
    
    plt.title('Cumulative Expenses Over Time', fontsize=14)
    plt.xlabel('Date', fontsize=10)
    plt.ylabel('Cumulative Amount (₹)', fontsize=10)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    # Format y-axis with Indian currency notation
    from matplotlib.ticker import FuncFormatter
    def indian_currency_formatter(x, pos):
        if x >= 10000000:
            return f'Rs.{x/10000000:.1f}Cr'
        elif x >= 100000:
            return f'Rs.{x/100000:.1f}L'
        elif x >= 1000:
            return f'Rs.{x/1000:.1f}K'
        else:
            return f'Rs.{x:.0f}'
    
    plt.gca().yaxis.set_major_formatter(FuncFormatter(indian_currency_formatter))
    
    # Save figure to a temporary file
    temp_chart2_file = os.path.join(DATA_DIR, 'temp_chart2.png')
    plt.savefig(temp_chart2_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Add the chart to the PDF
    pdf.image(temp_chart2_file, x=10, y=pdf.get_y(), w=190)
    
    # Add top expenses
    pdf.add_page()
    pdf.chapter_title('Major Expenditures')
    pdf.section_title('Top 10 Largest Expenses')
    
    # Table headers
    pdf.set_font('Arial', 'B', 10)
    pdf.set_fill_color(240, 240, 240)
    pdf.cell(25, 8, 'Date', 1, 0, 'C', fill=True)
    pdf.cell(30, 8, 'Category', 1, 0, 'C', fill=True)
    pdf.cell(35, 8, 'Subcategory', 1, 0, 'C', fill=True)
    pdf.cell(45, 8, 'Amount', 1, 0, 'C', fill=True)
    pdf.cell(55, 8, 'Description', 1, 1, 'C', fill=True)
    
    # Table data
    pdf.set_font('Arial', '', 9)
    largest_expenses = expenses_df.sort_values('Amount', ascending=False).head(10)
    for _, expense in largest_expenses.iterrows():
        pdf.cell(25, 7, f"{expense['Date'].strftime('%Y-%m-%d')}", 1)
        pdf.cell(30, 7, f"{expense['Category']}", 1)
        pdf.cell(35, 7, f"{expense['Subcategory']}", 1)
        pdf.cell(45, 7, f"{format_pdf_indian_currency(expense['Amount'])}", 1, 0, 'R')
        
        # Handle long descriptions
        description = expense['Description']
        if len(description) > 25:
            description = description[:25] + "..."
        pdf.cell(55, 7, f"{description}", 1)
        pdf.ln()
    
    # Add recent expenses
    pdf.add_page()
    pdf.section_title('Recent Expenses (Last 10)')
    
    # Table headers
    pdf.set_font('Arial', 'B', 10)
    pdf.set_fill_color(240, 240, 240)
    pdf.cell(25, 8, 'Date', 1, 0, 'C', fill=True)
    pdf.cell(30, 8, 'Category', 1, 0, 'C', fill=True)
    pdf.cell(35, 8, 'Subcategory', 1, 0, 'C', fill=True)
    pdf.cell(45, 8, 'Amount', 1, 0, 'C', fill=True)
    pdf.cell(55, 8, 'Description', 1, 1, 'C', fill=True)
    
    # Table data
    pdf.set_font('Arial', '', 9)
    recent_expenses = expenses_df.sort_values('Date', ascending=False).head(10)
    for _, expense in recent_expenses.iterrows():
        pdf.cell(25, 7, f"{expense['Date'].strftime('%Y-%m-%d')}", 1)
        pdf.cell(30, 7, f"{expense['Category']}", 1)
        pdf.cell(35, 7, f"{expense['Subcategory']}", 1)
        pdf.cell(45, 7, f"{format_pdf_indian_currency(expense['Amount'])}", 1, 0, 'R')
        
        # Handle long descriptions
        description = expense['Description']
        if len(description) > 25:
            description = description[:25] + "..."
        pdf.cell(55, 7, f"{description}", 1)
        pdf.ln()
        
    # Save the PDF
    pdf.output(output_file)

def load_data():
    """
    Load existing expense data from CSV file
    """
    global expenses_df
    
    if os.path.exists(CSV_FILE):
        # Read existing data
        existing_data = pd.read_csv(CSV_FILE)
        existing_data['Date'] = pd.to_datetime(existing_data['Date'])
        
        # Merge with current data frame
        expenses_df = pd.concat([expenses_df, existing_data], ignore_index=True)
        
        # Remove duplicates
        expenses_df = expenses_df.drop_duplicates(
            subset=['Date', 'Category', 'Subcategory', 'Description', 'Amount', 'Payment_Method'],
            keep='last'
        )
        
        print(f"Loaded {len(expenses_df)} existing expense records successfully.")
        return True
    else:
        print("No existing data file found. Starting with empty dataset.")
        return False

def add_expense(date, category, subcategory, description, amount, payment_method, receipt_reference):
    """
    Add a new expense to the dataset
    """
    global expenses_df
    
    # Load latest data first to ensure we're not missing any records
    if os.path.exists(CSV_FILE):
        existing_data = pd.read_csv(CSV_FILE)
        existing_data['Date'] = pd.to_datetime(existing_data['Date'])
        expenses_df = pd.concat([expenses_df, existing_data], ignore_index=True)
    
    # Validate category and subcategory
    if category not in expense_categories:
        raise ValueError(f"Invalid category. Must be one of: {list(expense_categories.keys())}")
    
    # Handle nested subcategories
    valid_subcategories = []
    if isinstance(expense_categories[category], list):
        valid_subcategories = expense_categories[category]
    else:
        # For nested categories (like Building Material)
        for subcat_list in expense_categories[category].values():
            valid_subcategories.extend(subcat_list)
    
    if subcategory not in valid_subcategories:
        raise ValueError(f"Invalid subcategory for {category}. Must be one of: {valid_subcategories}")
    
    # Create new expense entry
    new_expense = pd.DataFrame({
        'Date': [pd.to_datetime(date)],
        'Category': [category],
        'Subcategory': [subcategory],
        'Description': [description],
        'Amount': [float(amount)],
        'Payment_Method': [payment_method],
        'Receipt_Reference': [receipt_reference]
    })
    
    # Append to existing data
    expenses_df = pd.concat([expenses_df, new_expense], ignore_index=True)
    
    # Remove duplicates and save
    expenses_df = expenses_df.drop_duplicates(
        subset=['Date', 'Category', 'Subcategory', 'Description', 'Amount', 'Payment_Method'],
        keep='last'
    )
    
    # Save updated data
    save_data()
    print(f"Added new expense: {description} - ₹{amount:,.2f}")

# Load existing data if available
load_data()

Loaded 7 existing expense records successfully.



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



True

In [12]:
def format_indian_currency(amount):
    """Format amount in Indian Rupees with proper comma placement"""
    # Convert to string with 2 decimal places
    str_amount = f"{amount:,.2f}"
    
    # Split into whole and decimal parts
    parts = str_amount.split('.')
    whole = parts[0].replace(',', '')
    decimal = parts[1]
    
    # Add commas for Indian number system
    result = ''
    # First comma after 3 digits from right
    if len(whole) > 3:
        result = ',' + whole[-3:]
        whole = whole[:-3]
        # Then commas after every 2 digits
        while whole:
            if len(whole) > 2:
                result = ',' + whole[-2:] + result
                whole = whole[:-2]
            else:
                result = whole + result
                whole = ''
    else:
        result = whole
    
    # Add decimal part and currency symbol
    return f"₹{result}.{decimal}"

def format_pdf_indian_currency(amount):
    """Format amount in Indian Rupees with proper comma placement for PDF compatibility"""
    # Convert to string with 2 decimal places
    str_amount = f"{amount:,.2f}"
    
    # Split into whole and decimal parts
    parts = str_amount.split('.')
    whole = parts[0].replace(',', '')
    decimal = parts[1]
    
    # Add commas for Indian number system
    result = ''
    # First comma after 3 digits from right
    if len(whole) > 3:
        result = ',' + whole[-3:]
        whole = whole[:-3]
        # Then commas after every 2 digits
        while whole:
            if len(whole) > 2:
                result = ',' + whole[-2:] + result
                whole = whole[:-2]
            else:
                result = whole + result
                whole = ''
    else:
        result = whole
    
    # Add decimal part and currency symbol (using Rs. instead of ₹ for PDF compatibility)
    return f"Rs.{result}.{decimal}"

def analyze_expenses():
    """
    Analyze expenses and show summary statistics
    """
    if expenses_df.empty:
        print("No expenses data available.")
        return
        
    print("\nEXPENSE ANALYSIS")
    print("=" * 50)
    
    # Calculate total expenses
    total = expenses_df['Amount'].sum()
    print(f"\nTotal Expenses: {format_indian_currency(total)}")
    
    # Expenses by category
    print("\nExpenses by Category:")
    category_totals = expenses_df.groupby('Category')['Amount'].sum().sort_values(ascending=False)
    for category, amount in category_totals.items():
        percentage = (amount / total) * 100
        print(f"{category}: {format_indian_currency(amount)} ({percentage:.1f}%)")
    
    # Monthly summary
    print("\nMonthly Expense Summary:")
    monthly_expenses = expenses_df.set_index('Date').resample('ME')['Amount'].sum()
    for date, amount in monthly_expenses.items():
        print(f"{date.strftime('%B %Y')}: {format_indian_currency(amount)}")

def visualize_expenses():
    """
    Create visualizations for expense analysis
    """
    if expenses_df.empty:
        print("No expenses data available for visualization.")
        return
    
    # Create a pie chart for category distribution
    fig1 = px.pie(expenses_df, values='Amount', names='Category', 
                  title='Expense Distribution by Category',
                  hover_data=['Amount'],
                  custom_data=['Amount'])
    fig1.update_traces(hovertemplate='Category: %{label}<br>Amount: ₹%{customdata:,.2f}<br>Percentage: %{percent:.1%}')
    fig1.show()
    
    # Create a bar chart for subcategory distribution
    category_sub = expenses_df.groupby(['Category', 'Subcategory'])['Amount'].sum().reset_index()
    fig2 = px.bar(category_sub, x='Category', y='Amount', color='Subcategory',
                  title='Expenses by Category and Subcategory')
    fig2.update_layout(yaxis_title="Amount (₹)")
    fig2.update_traces(hovertemplate='Category: %{x}<br>Subcategory: %{customdata}<br>Amount: ₹%{y:,.2f}')
    fig2.show()
    
    # Create a line chart for cumulative expenses over time
    expenses_df_sorted = expenses_df.sort_values('Date')
    expenses_df_sorted['Cumulative'] = expenses_df_sorted['Amount'].cumsum()
    fig3 = px.line(expenses_df_sorted, x='Date', y='Cumulative',
                   title='Cumulative Expenses Over Time')
    fig3.update_layout(yaxis_title="Cumulative Amount (₹)")
    fig3.update_traces(hovertemplate='Date: %{x}<br>Cumulative Amount: ₹%{y:,.2f}')
    fig3.show()

def generate_monthly_report():
    """
    Generate a detailed monthly report of expenses
    """
    if expenses_df.empty:
        print("No expenses data available for report generation.")
        return
    
    current_month = pd.Timestamp.now().strftime('%B %Y')
    
    print(f"\nMONTHLY EXPENSE REPORT - {current_month}")
    print("=" * 50)
    
    # Get current month's expenses
    current_month_mask = expenses_df['Date'].dt.strftime('%B %Y') == current_month
    month_expenses = expenses_df[current_month_mask]
    
    if month_expenses.empty:
        print("\nNo expenses recorded for the current month.")
        return
    
    # Total expenses this month
    total = month_expenses['Amount'].sum()
    print(f"\nTotal Expenses This Month: {format_indian_currency(total)}")
    
    # Expenses by category this month
    print("\nExpenses by Category:")
    category_totals = month_expenses.groupby('Category')['Amount'].sum()
    for category, amount in category_totals.items():
        percentage = (amount / total) * 100
        print(f"{category}: {format_indian_currency(amount)} ({percentage:.1f}%)")
    
    # Detailed expense list
    print("\nDetailed Expense List:")
    for _, expense in month_expenses.iterrows():
        print("-" * 100)
        print(f"Date: {expense['Date'].strftime('%Y-%m-%d')}")
        print(f"Category: {expense['Category']} - {expense['Subcategory']}")
        print(f"Description: {expense['Description']}")
        print(f"Amount: {format_indian_currency(expense['Amount'])}")
        print(f"Payment Method: {expense['Payment_Method']}")
        if pd.notna(expense['Note']):
            print(f"Note: {expense['Note']}")

# Generate PDF report example
def generate_full_report():
    """Generate a comprehensive PDF report of all house expenses"""
    print("Generating comprehensive PDF report...")
    
    # Generate PDF report to the default location
    pdf_file = os.path.join(DATA_DIR, 'expense_report.pdf')
    generate_pdf_report(pdf_file)
    
    print(f"PDF report saved to: {pdf_file}")
    print("The report includes:")
    print(" - Executive summary with key metrics")
    print(" - Category-wise expense breakdown")
    print(" - Monthly expense analysis")
    print(" - Visualizations (pie chart, time series)")
    print(" - Lists of top expenses and recent transactions")
    
    return pdf_file

# Generate the report
report_file = generate_full_report()

# To view the report, uncomment the following line:
# import webbrowser; webbrowser.open(report_file)

Generating comprehensive PDF report...
PDF report saved to: house_expenses_data\expense_report.pdf
The report includes:
 - Executive summary with key metrics
 - Category-wise expense breakdown
 - Monthly expense analysis
 - Visualizations (pie chart, time series)
 - Lists of top expenses and recent transactions


In [None]:
# Example 1: Adding a new expense
add_expense(
    date="2025-06-01",
    category="Construction",
    subcategory="Labour charges",
    description="Initial foundation concrete work",
    amount=15000,
    payment_method="Bank Transfer",
    receipt_reference="FOUND001"
)

# Example 2: Adding another expense
add_expense(
    date="2025-06-01",
    category="Transportation",
    subcategory="Permits",
    description="Building permit application fee",
    amount=2500,
    payment_method="Credit Card",
    receipt_reference="PERM001"
)

# Continue adding the remaining expenses
remaining_expenses = [
    {
        'date': '2025-04-01',
        'category': 'Transportation',
        'subcategory': 'Permits',
        'description': 'Check post fees',
        'amount': 600,
        'note': 'check post cost'
    },
    {
        'date': '2025-04-01',
        'category': 'Transportation',
        'subcategory': 'Permits',
        'description': 'Transportation permit',
        'amount': 2600,
        'note': 'Permit for transportation of teak'
    },
    {
        'date': '2025-04-01',
        'category': 'Transportation',
        'subcategory': 'Personal Transport',
        'description': 'Travel expenses',
        'amount': 500,
        'note': 'General transportation to pormamilla'
    },
    {
        'date': '2025-04-01',
        'category': 'Planning',
        'subcategory': 'Other Expenses',
        'description': 'Poojari fees',
        'amount': 500,
        'note': 'Given to Poojari'
    },
    # Adding all remaining entries
    {
        'date': '2025-04-13',
        'category': 'Construction',
        'subcategory': 'Coolie',
        'description': 'Construction labor advance',
        'amount': 10000,
        'note': 'Advance for the Coolie, construction'
    }
]

# Add more entries following the same pattern...

# Add these expenses to the dataset
expenses_data.extend(remaining_expenses)

# Bulk add all expenses
bulk_add_expenses(expenses_data)

def format_indian_currency(amount):
    """Format amount in Indian Rupees with proper comma placement"""
    # Convert to string with 2 decimal places
    str_amount = f"{amount:,.2f}"
    
    # Split into whole and decimal parts
    parts = str_amount.split('.')
    whole = parts[0].replace(',', '')
    decimal = parts[1]
    
    # Add commas for Indian number system
    result = ''
    # First comma after 3 digits from right
    if len(whole) > 3:
        result = ',' + whole[-3:]
        whole = whole[:-3]
        # Then commas after every 2 digits
        while whole:
            if len(whole) > 2:
                result = ',' + whole[-2:] + result
                whole = whole[:-2]
            else:
                result = whole + result
                whole = ''
    else:
        result = whole
    
    # Add decimal part and currency symbol
    return f"₹{result}.{decimal}"

def format_pdf_indian_currency(amount):
    """Format amount in Indian Rupees with proper comma placement for PDF compatibility"""
    # Convert to string with 2 decimal places
    str_amount = f"{amount:,.2f}"
    
    # Split into whole and decimal parts
    parts = str_amount.split('.')
    whole = parts[0].replace(',', '')
    decimal = parts[1]
    
    # Add commas for Indian number system
    result = ''
    # First comma after 3 digits from right
    if len(whole) > 3:
        result = ',' + whole[-3:]
        whole = whole[:-3]
        # Then commas after every 2 digits
        while whole:
            if len(whole) > 2:
                result = ',' + whole[-2:] + result
                whole = whole[:-2]
            else:
                result = whole + result
                whole = ''
    else:
        result = whole
    
    # Add decimal part and currency symbol (using Rs. instead of ₹ for PDF compatibility)
    return f"Rs.{result}.{decimal}"

def analyze_expenses():
    """
    Analyze expenses and show summary statistics
    """
    if expenses_df.empty:
        print("No expenses data available.")
        return
        
    print("\nEXPENSE ANALYSIS")
    print("=" * 50)
    
    # Calculate total expenses
    total = expenses_df['Amount'].sum()
    print(f"\nTotal Expenses: {format_indian_currency(total)}")
    
    # Expenses by category
    print("\nExpenses by Category:")
    category_totals = expenses_df.groupby('Category')['Amount'].sum().sort_values(ascending=False)
    for category, amount in category_totals.items():
        percentage = (amount / total) * 100
        print(f"{category}: {format_indian_currency(amount)} ({percentage:.1f}%)")
    
    # Monthly summary
    print("\nMonthly Expense Summary:")
    monthly_expenses = expenses_df.set_index('Date').resample('ME')['Amount'].sum()
    for date, amount in monthly_expenses.items():
        print(f"{date.strftime('%B %Y')}: {format_indian_currency(amount)}")

def visualize_expenses():
    """
    Create visualizations for expense analysis
    """
    if expenses_df.empty:
        print("No expenses data available for visualization.")
        return
    
    # Create a pie chart for category distribution
    fig1 = px.pie(expenses_df, values='Amount', names='Category', 
                  title='Expense Distribution by Category',
                  hover_data=['Amount'],
                  custom_data=['Amount'])
    fig1.update_traces(hovertemplate='Category: %{label}<br>Amount: ₹%{customdata:,.2f}<br>Percentage: %{percent:.1%}')
    fig1.show()
    
    # Create a bar chart for subcategory distribution
    category_sub = expenses_df.groupby(['Category', 'Subcategory'])['Amount'].sum().reset_index()
    fig2 = px.bar(category_sub, x='Category', y='Amount', color='Subcategory',
                  title='Expenses by Category and Subcategory')
    fig2.update_layout(yaxis_title="Amount (₹)")
    fig2.update_traces(hovertemplate='Category: %{x}<br>Subcategory: %{customdata}<br>Amount: ₹%{y:,.2f}')
    fig2.show()
    
    # Create a line chart for cumulative expenses over time
    expenses_df_sorted = expenses_df.sort_values('Date')
    expenses_df_sorted['Cumulative'] = expenses_df_sorted['Amount'].cumsum()
    fig3 = px.line(expenses_df_sorted, x='Date', y='Cumulative',
                   title='Cumulative Expenses Over Time')
    fig3.update_layout(yaxis_title="Cumulative Amount (₹)")
    fig3.update_traces(hovertemplate='Date: %{x}<br>Cumulative Amount: ₹%{y:,.2f}')
    fig3.show()

def generate_monthly_report():
    """
    Generate a detailed monthly report of expenses
    """
    if expenses_df.empty:
        print("No expenses data available for report generation.")
        return
    
    current_month = pd.Timestamp.now().strftime('%B %Y')
    
    print(f"\nMONTHLY EXPENSE REPORT - {current_month}")
    print("=" * 50)
    
    # Get current month's expenses
    current_month_mask = expenses_df['Date'].dt.strftime('%B %Y') == current_month
    month_expenses = expenses_df[current_month_mask]
    
    if month_expenses.empty:
        print("\nNo expenses recorded for the current month.")
        return
    
    # Total expenses this month
    total = month_expenses['Amount'].sum()
    print(f"\nTotal Expenses This Month: {format_indian_currency(total)}")
    
    # Expenses by category this month
    print("\nExpenses by Category:")
    category_totals = month_expenses.groupby('Category')['Amount'].sum()
    for category, amount in category_totals.items():
        percentage = (amount / total) * 100
        print(f"{category}: {format_indian_currency(amount)} ({percentage:.1f}%)")
    
    # Detailed expense list
    print("\nDetailed Expense List:")
    for _, expense in month_expenses.iterrows():
        print("-" * 100)
        print(f"Date: {expense['Date'].strftime('%Y-%m-%d')}")
        print(f"Category: {expense['Category']} - {expense['Subcategory']}")
        print(f"Description: {expense['Description']}")
        print(f"Amount: {format_indian_currency(expense['Amount'])}")
        print(f"Payment Method: {expense['Payment_Method']}")
        if pd.notna(expense['Note']):
            print(f"Note: {expense['Note']}")

# Generate PDF report example
def generate_full_report():
    """Generate a comprehensive PDF report of all house expenses"""
    print("Generating comprehensive PDF report...")
    
    # Generate PDF report to the default location
    pdf_file = os.path.join(DATA_DIR, 'expense_report.pdf')
    generate_pdf_report(pdf_file)
    
    print(f"PDF report saved to: {pdf_file}")
    print("The report includes:")
    print(" - Executive summary with key metrics")
    print(" - Category-wise expense breakdown")
    print(" - Monthly expense analysis")
    print(" - Visualizations (pie chart, time series)")
    print(" - Lists of top expenses and recent transactions")
    
    return pdf_file

# Generate the report
report_file = generate_full_report()

# To view the report, uncomment the following line:
# import webbrowser; webbrowser.open(report_file)

ValueError: Invalid category. Must be one of: ['Planning', 'House Plot', 'Building Plan Approval', 'Initial Setup', 'Transportation', 'Building Material', 'Construction', 'Pooja', 'Doors & Windows']

In [None]:
# Add the remaining expenses
final_expenses = [
    {
        'date': '2025-04-21',
        'category': 'Building Plan Approval',
        'subcategory': 'Property Tax',
        'description': 'Property tax payment',
        'amount': 8197,
        'note': 'property tax'
    },
    {
        'date': '2025-04-23',
        'category': 'Initial Setup',
        'subcategory': 'Basic Things',
        'description': 'Utility connections',
        'amount': 1000,
        'note': 'Electrical & water connections'
    },
    {
        'date': '2025-04-26',
        'category': 'Building Material',
        'subcategory': 'RR Stones',
        'description': 'Stone purchase',
        'amount': 4000,
        'note': '1 tractor'
    },
    {
        'date': '2025-04-26',
        'category': 'Initial Setup',
        'subcategory': 'Construction Equipment',
        'description': 'Construction tools',
        'amount': 2700,
        'note': 'bought 10 tubs, 2 paralu'
    },
    {
        'date': '2025-04-26',
        'category': 'Pooja',
        'subcategory': 'Bhumi Pooja',
        'description': 'Pooja items',
        'amount': 1530,
        'note': 'vastralu'
    },
    {
        'date': '2025-04-09',
        'category': 'Doors & Windows',
        'subcategory': 'Carpenter',
        'description': 'Carpenter advance',
        'amount': 4000,
        'note': 'Advance for Aachari'
    },
    # Add all remaining entries following this pattern...
    {
        'date': '2025-05-21',
        'category': 'Construction',
        'subcategory': 'Bricks',
        'description': 'Brick purchase',
        'amount': 6000,
        'note': 'fly ash bricks'
    }
]

# Add these expenses to the dataset
expenses_data.extend(final_expenses)

# Bulk add all expenses
bulk_add_expenses(expenses_data)

# Run final analysis
print("\nFinal Expense Analysis")
print("=" * 50)
analyze_expenses()
visualize_expenses()
generate_monthly_report()

In [None]:
# Add all remaining expenses
complete_expenses = [
    {
        'date': '2025-04-28',
        'category': 'Construction',
        'subcategory': 'Coolie',
        'description': 'First payment after marking',
        'amount': 100000,
        'payment_method': 'Cash',
        'note': 'first payment to masthan, after marking'
    },
    {
        'date': '2025-04-28',
        'category': 'Pooja',
        'subcategory': 'Bhumi Pooja',
        'description': 'Pooja materials',
        'amount': 3000,
        'note': 'Bought things for Bhumi Pooja'
    },
    {
        'date': '2025-04-29',
        'category': 'Construction',
        'subcategory': 'Ramapuram Kankara',
        'description': 'Kankara before Bhumi Pooja',
        'amount': 23200,
        'note': '6 units kankara before bhumi pooja'
    },
    {
        'date': '2025-05-06',
        'category': 'Construction',
        'subcategory': 'JCB',
        'description': 'JCB work',
        'amount': 16000,
        'note': 'rent for JCB'
    },
    {
        'date': '2025-05-01',
        'category': 'Construction',
        'subcategory': 'kankara',
        'description': 'Kankara stones',
        'amount': 4800,
        'note': '1 tractor 40mm kankara'
    },
    # Add all the remaining transactions following this pattern
    {
        'date': '2025-05-29',
        'category': 'Construction',
        'subcategory': 'Iron',
        'description': 'Iron payment',
        'amount': 75000,
        'payment_method': 'PhonePe',
        'note': 'Iron ; phonepe'
    },
    {
        'date': '2025-05-29',
        'category': 'Construction',
        'subcategory': 'Cement',
        'description': 'Cement payment',
        'amount': 20000,
        'payment_method': 'PhonePe',
        'note': 'cement; Phonepe'
    }
]

# Add all expenses to the dataset
expenses_data.extend(complete_expenses)

# Final bulk addition of all expenses
bulk_add_expenses(expenses_data)

# Generate final reports
print("\nFinal Analysis with All Expenses")
print("=" * 50)
analyze_expenses()
visualize_expenses()
generate_monthly_report()

## 2. Define Expense Categories

Let's set up the categories and subcategories for tracking house construction expenses.