In [38]:
import pandas as pd

### CATEGORIZE EXPENSES

In [39]:
categories = [  
                'A: Morgage',
                'A: Medical Insurance',
                'B: Car Insurance',
                'B: Utilities n Bills',
                'C: Paper Check - Aikido?',
                'C: Cash Withdrawals',
                'C: College Tuition',
                'I: Anita Income',
                'I: Fidelity Transfer',
                'I: KeyBank cash-back',
                'T: Taxes',
                'V: Visa Payment',
                'U: Uncategorized', 
                ]

all_categories = pd.DataFrame({'Category': categories})
print(all_categories)

def get_category(expense_category):
    if expense_category not in categories:
        print(f"ERROR ! Category '{expense_category}' does not exist in the predefined categories.")
    return expense_category

                    Category
0                 A: Morgage
1       A: Medical Insurance
2           B: Car Insurance
3       B: Utilities n Bills
4   C: Paper Check - Aikido?
5        C: Cash Withdrawals
6            I: Anita Income
7       I: Fidelity Transfer
8       I: KeyBank cash-back
9                   T: Taxes
10           V: Visa Payment
11          U: Uncategorized


In [46]:
import re # re.search etc. regular expressions

def categorize(description):

    # Morgage WFHM Wells Fargo Home Morgage
    if 'WFHM' in description.upper():
        return get_category('A: Morgage')
    # Medical Insurance
    #   VSP Vision Care
    #   ROCKWELL
    elif    'MEDICARE' in description.upper()           or 'VSP' in description.upper() \
            or 'UNITEDHEALTHCARE' in description.upper() \
            or 'ROCKWELL ' in description.upper()       or 'AARP HEALTH' in description.upper() :
        return  get_category('A: Medical Insurance') 
    # Car Insurance
    elif 'LIBERTY MUTUAL' in description.upper():
        return get_category('B: Car Insurance')
    # Utilities n Bills
    #   CWD Cleveland Dept of Water
    #   ENBRIDGE GAS
    #   CLEVELAND HEIGHTS sewage ???
    #   NEORSD Northeast Ohio Sewer District
    elif    'VERIZON' in description.upper()                or 'DOMINION ' in description.upper()\
            or 'FIRST ENERGY' in description.upper()        or 'NORTHEAST OHIO' in description.upper() \
            or 'CLEVELAND HEIGHTS ' in description.upper()  or 'ENBRIDGE GAS ' in description.upper() \
            or 'ATT ' in description.upper()                or 'NEORSD ' in description.upper() \
            or 'CWD ' in description.upper() :
        return  get_category('B: Utilities n Bills')
    # Paper check
    elif    'CHECK' in description.upper():
        return  get_category('C: Paper Check - Aikido?')
    # Paper check
    elif    'ATM ' in description.upper():
        return  get_category('C: Cash Withdrawals')
    # College Tuition
    elif    'SMARTPAYCIA' in description.upper()           or 'CASHNET' in description.upper():
        return  get_category('C: College Tuition')
    # Anita income 
    elif    'ZELLE DEP ANITA' in description.upper() :
        return  get_category('I: Anita Income')
    # Fidelity Transfer
    elif 'FID ' in description.upper():
        return get_category('I: Fidelity Transfer')    
    # KeyBank cash-back
    elif    'KEY REWARDS' in description.upper() or 'GIFT FROM KEY BANK' in description.upper() :
        return  get_category('I: KeyBank cash-back')
    # Taxes and Tax refunds
    elif    'TAXREFUND' in description.upper() or 'IRS ' in description.upper() \
            or 'TAX REF' in description.upper() or 'RITA ' in description.upper() :
        return  get_category('T: Taxes')
    # Visa Payment
    elif 'INTERNET TRF TO CCA' in description.upper():
        return get_category('V: Visa Payment')    
    # OTHER
    else:
        return  get_category('U: Uncategorized')

In [47]:
def categorize_all(input_period):
    # acct_suffix = "CCard"
    acct_suffix = 'CheckingAcct'
    input_datafile = input_period + '-A&T-' + acct_suffix + '.xlsx'
    output_datafile_categorized = input_period + '-A&T-' + acct_suffix + '-CATEGORIZED.csv'
    output_datafile_categorized_sorted = input_period + '-A&T-' + acct_suffix + '-SORTED-BY-CATEGORY.csv'
    output_datafile_summary = input_period + '-A&T-' + acct_suffix + '-SUMMARY.csv'

    # Load the data
    data = pd.read_excel(input_datafile)
    data.columns = ['Date', 'Amount', 'Description', 'Ref,#']

    # Categorize by description and add Category column
    data['Category'] = data['Description'].apply(categorize)
    # Save the updated DataFrame to a new CSV file
    data.to_csv(output_datafile_categorized, index=False)

    # Group transactions by category (e.g., food, clothing, entertainment) and calculate total amounts for each category
    # categories = ['Groceries', 'Subscription', 'Dining Out']
    sorted_by_category = data.sort_values(by='Category', ascending=True)
    # save sorted data to a file
    sorted_by_category.to_csv(output_datafile_categorized_sorted)

    # Summarize amounts in grouped categories
    grouped_summary = data.groupby('Category')['Amount'].sum().reset_index()
    
    #cat_num = data["Category"].nunique()
    cat_num = all_categories["Category"].nunique()
    merged_summary = all_categories.merge(grouped_summary, on='Category', how='left')
    merged_summary = merged_summary.fillna(0)
    print(merged_summary)

    # sorted_summary = grouped_summary.sort_values(by='Amount', ascending=False).head(cat_num)
    sorted_summary = merged_summary.sort_values(by='Category', ascending=True).head(cat_num)
    sorted_summary.to_csv(output_datafile_summary, index=False)

In [48]:
categorize_all('2024-01')

                    Category   Amount
0                 A: Morgage -1408.78
1       A: Medical Insurance  -781.36
2           B: Car Insurance     0.00
3       B: Utilities n Bills  -737.14
4   C: Paper Check - Aikido?  -330.00
5        C: Cash Withdrawals     0.00
6            I: Anita Income  4000.00
7       I: Fidelity Transfer  9000.00
8       I: KeyBank cash-back   125.00
9                   T: Taxes     0.00
10           V: Visa Payment     0.00
11          U: Uncategorized     0.00


In [49]:
categorize_all('2024-02')

                    Category   Amount
0                 A: Morgage -1408.78
1       A: Medical Insurance -1325.06
2           B: Car Insurance     0.00
3       B: Utilities n Bills  -877.86
4   C: Paper Check - Aikido?  -150.00
5        C: Cash Withdrawals  -300.00
6            I: Anita Income  7000.00
7       I: Fidelity Transfer     0.00
8       I: KeyBank cash-back     0.00
9                   T: Taxes     0.00
10           V: Visa Payment -5022.73
11          U: Uncategorized     0.00


In [50]:
categorize_all('2024-03')

                    Category   Amount
0                 A: Morgage -1408.78
1       A: Medical Insurance  -781.36
2           B: Car Insurance -1354.50
3       B: Utilities n Bills  -897.31
4   C: Paper Check - Aikido? -2303.00
5        C: Cash Withdrawals     0.00
6            I: Anita Income  6500.00
7       I: Fidelity Transfer  9000.00
8       I: KeyBank cash-back     0.00
9                   T: Taxes    21.00
10           V: Visa Payment -5557.24
11          U: Uncategorized     0.00


In [51]:
categorize_all('2024-04')

                    Category   Amount
0                 A: Morgage -1408.78
1       A: Medical Insurance  -781.36
2           B: Car Insurance     0.00
3       B: Utilities n Bills  -647.40
4   C: Paper Check - Aikido?  -809.00
5        C: Cash Withdrawals    58.20
6            I: Anita Income  6200.00
7       I: Fidelity Transfer     0.00
8       I: KeyBank cash-back   482.02
9                   T: Taxes -1050.00
10           V: Visa Payment -6711.26
11          U: Uncategorized     0.00


In [52]:
categorize_all('2024-05')

                    Category    Amount
0                 A: Morgage  -1408.78
1       A: Medical Insurance  -1305.46
2           B: Car Insurance      0.00
3       B: Utilities n Bills   -861.51
4   C: Paper Check - Aikido?      0.00
5        C: Cash Withdrawals      0.00
6            I: Anita Income   6200.00
7       I: Fidelity Transfer   6000.00
8       I: KeyBank cash-back      0.00
9                   T: Taxes      0.00
10           V: Visa Payment -10531.66
11          U: Uncategorized      0.00


In [53]:
categorize_all('2024-06')

                    Category   Amount
0                 A: Morgage -1408.78
1       A: Medical Insurance  -781.36
2           B: Car Insurance     0.00
3       B: Utilities n Bills  -724.12
4   C: Paper Check - Aikido?  -300.00
5        C: Cash Withdrawals     0.00
6            I: Anita Income  5300.00
7       I: Fidelity Transfer  9000.00
8       I: KeyBank cash-back     0.00
9                   T: Taxes     0.00
10           V: Visa Payment -1000.00
11          U: Uncategorized     0.00


In [54]:
categorize_all('2024-07')

                    Category    Amount
0                 A: Morgage  -1565.84
1       A: Medical Insurance   -781.36
2           B: Car Insurance      0.00
3       B: Utilities n Bills   -823.11
4   C: Paper Check - Aikido?   -420.00
5        C: Cash Withdrawals      0.00
6            I: Anita Income   6500.00
7       I: Fidelity Transfer  12000.00
8       I: KeyBank cash-back      0.00
9                   T: Taxes      0.00
10           V: Visa Payment  -5752.65
11          U: Uncategorized  -8382.15


In [None]:
categorize_all('2024-08')

In [None]:

categorize_all('2024-09')