In [1]:
import pandas as pd
import os

def load_income_expense_data(project_root, data_path=None):
    """
    Load income and expense data from a CSV file.
    
    Parameters:
    -----------
    project_root : str
        Path to the root directory of the project
    data_path : str, optional
        Path to the CSV file containing income and expense data
        If not provided, it will default to 'data/Inc_Exp_Data.csv' relative to the project root
    
    Returns:
    --------
    pandas.DataFrame
        Loaded dataframe with income and expense information
    """
    if data_path is None:
        data_path = os.path.join(project_root, 'data', 'Inc_Exp_Data.csv')
    
    try:
        # Check if file exists
        if not os.path.exists(data_path):
            raise FileNotFoundError(f"The file {data_path} does not exist.")
        
        # Read the CSV file
        df = pd.read_csv(data_path)
        
        # Basic data validation
        if df.empty:
            print("Warning: The loaded dataframe is empty.")
        
        return df
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [2]:
import pandas as pd
import numpy as np

def calculate_income_statistics(df):
    """
    Calculate key statistics for income data.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing income data
    
    Returns:
    --------
    dict
        Dictionary of key income statistics
    """
    if df is None:
        return None
    
    income_stats = {
        'total_income': df['Income'].sum(),
        'average_income': df['Income'].mean(),
        'median_income': df['Income'].median(),
        'income_std_dev': df['Income'].std(),
        'min_income': df['Income'].min(),
        'max_income': df['Income'].max()
    }
    
    return income_stats

def categorize_income(df, bins=None):
    """
    Categorize income into predefined or custom bins.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing income data
    bins : list, optional
        Custom income bins. If None, uses default categorization
    
    Returns:
    --------
    pandas.Series
        Series of income categories
    """
    if bins is None:
        bins = [0, 20000, 40000, 60000, 80000, np.inf]
        labels = ['Low', 'Lower Middle', 'Middle', 'Upper Middle', 'High']
    else:
        labels = [f'Category {i+1}' for i in range(len(bins)-1)]
    
    return pd.cut(df['Income'], bins=bins, labels=labels)

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_income_distribution(df):
    """
    Create a histogram of income distribution.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing income data
    """
    plt.figure(figsize=(10, 6))
    sns.histplot(data=df, x='Income', kde=True)
    plt.title('Income Distribution')
    plt.xlabel('Income')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.savefig('income_distribution.png')
    plt.close()

def plot_income_by_category(df):
    """
    Create a box plot of income by category.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing income data with categories
    """
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Income_Category', y='Income', data=df)
    plt.title('Income Distribution by Category')
    plt.xlabel('Income Category')
    plt.ylabel('Income')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('income_by_category.png')
    plt.close()