In [1]:
import pandas as pd
from pha_tools.io import gather_data_filenames, load_donation_data_from_filenames

In [2]:
data_dir = '../data/'
glob_text = 'transactions*.xlsx'
filenames = gather_data_filenames(data_dir, glob_text)
donations = load_donation_data_from_filenames(filenames)

In [3]:
years = sorted(int(yr) for yr in donations['date'].dt.year.unique())

In [4]:
def normalize_name(name):
    return ' '.join(s.capitalize() for s in name.split())

In [5]:
donations['donor_name'] = donations['donor_name'].apply(normalize_name)

In [6]:
donors_by_year = {}
for year in years:
    donors_by_year[year] = set(donations['donor_name'].loc[donations['date'].dt.year == year])

In [7]:
report = {}
for last_year in years:
    this_year = last_year + 1
    if this_year not in years: break
    this_year_donations = donations.loc[donations['date'].dt.year == this_year]
    last_year_donations = donations.loc[donations['date'].dt.year == last_year]
    
    report[this_year] = {}
    last_year_donors = donors_by_year[last_year]
    this_year_donors = donors_by_year[this_year]
    new = this_year_donors - last_year_donors
    report[this_year]['new'] = new

    lost = last_year_donors - this_year_donors
    report[this_year]['lost'] = lost

    returning = this_year_donors & last_year_donors
    report[this_year]['returning'] = returning

    report[this_year]['upgrades'] = {}  # name: (this_year_sum, last_year_sum)
    report[this_year]['downgrades'] = {}  # name: (this_year_sum, last_year_sum)
    for name in returning:
        this_year_sum = this_year_donations.loc[this_year_donations['donor_name'] == name]['amount'].sum()
        last_year_sum = last_year_donations.loc[last_year_donations['donor_name'] == name]['amount'].sum()
        year_to_year = (this_year_sum, last_year_sum)
        if this_year_sum > last_year_sum:  # upgrade if increased
            report[this_year]['upgrades'][name] = year_to_year
        else:  # no increase is a downgrade
            report[this_year]['downgrades'][name] = year_to_year

for year in report:
    print(year)
    print(f'lost: {len(report[year]["lost"])}')
    print(f'new: {len(report[year]["new"])}')
    print(f'returning: {len(report[year]["returning"])}')
    print(f'\tupgrades:   {len(report[year]['upgrades']):>4} donors for ${sum(y2y[0] - y2y[1] for name, y2y in report[year]['upgrades'].items()):9.2f}')
    print(f'\tdowngrades: {len(report[year]['downgrades']):>4} donors for ${sum(y2y[0] - y2y[1] for name, y2y in report[year]['downgrades'].items()):9.2f}')

2023
lost: 9
new: 19
returning: 301
	upgrades:    182 donors for $ 22533.53
	downgrades:  119 donors for $-11807.13
2024
lost: 10
new: 11
returning: 310
	upgrades:    185 donors for $ 22526.59
	downgrades:  125 donors for $-13247.90
2025
lost: 38
new: 8
returning: 283
	upgrades:     74 donors for $  6191.21
	downgrades:  209 donors for $-31272.77


In [8]:
print(", ".join(donations.columns))

txn_id, date, email, amount, campaign, channel, payment_method, receipt_no, donor_name


In [9]:
donations['year'] = donations.date.dt.year
donations.head()

Unnamed: 0,txn_id,date,email,amount,campaign,channel,payment_method,receipt_no,donor_name,year
0,T2023-000001,2023-11-14,juan-davis@mail.ca,38.13,Education,Web,Transfer,R202327129,Juan Davis,2023
1,T2023-000002,2023-01-28,carlos-thomas@gmail.com,58.17,Nutrition,Web,Credit Card,R202343502,Carlos Thomas,2023
2,T2023-000003,2023-06-03,patricia-hernandez@outlook.com,20.38,Healthcare,Web,Cash,R202352449,Patricia Hernandez,2023
3,T2023-000004,2023-07-21,jose-taylor@mail.ca,98.95,Nutrition,Check,Cash,R202338279,Jose Taylor,2023
4,T2023-000005,2023-09-25,gabriela-anderson@yahoo.com,42.45,Community Development,Event,Check,,Gabriela Anderson,2023


In [10]:
agg_columns = ['year', 'donor_name', 'amount']
donations[agg_columns].groupby(['donor_name', 'year']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,amount
donor_name,year,Unnamed: 2_level_1
Adrian Hernandez,2022,163.96
Adrian Hernandez,2023,65.26
Adrian Hernandez,2024,203.63
Adrian Hernandez,2025,75.96
Adrian Johnson,2022,261.54
...,...,...
Tim Torres,2024,163.22
Tim Torres,2025,99.68
Tim Williams,2022,4.29
Tim Williams,2024,306.13


In [22]:
REPORT_YEAR = 2025
donor_stats_columns = ['num_years_donated', 'first_year', 'last_year', 'status', 'average_annual_donation']
donor_stats = pd.DataFrame(index=sorted(donations['donor_name'].unique()), columns=donor_stats_columns)

donor_year_amounts = donations[agg_columns].groupby(['donor_name', 'year']).sum().reset_index(['year'])

donor_years = donor_year_amounts.reset_index()[['donor_name', 'year']].groupby('donor_name')
donor_stats['num_years_donated'] = donor_years.count()
donor_stats['first_year'] = donor_years.min()
donor_stats['last_year'] = donor_years.max()

donor_stats['average_annual_donation'] = donor_year_amounts.reset_index().drop(columns=['year']).groupby('donor_name').mean()

for donor in donor_stats.index:
    this_donor_mask = donor_year_amounts.index == donor  # use a mask to always return a DataFrame
    this_donor_year_amounts = donor_year_amounts[this_donor_mask].set_index('year').sort_index()['amount']
    
    if this_donor_year_amounts.index.max() != REPORT_YEAR:
        status = 'Lost'
    elif this_donor_year_amounts.index.min() == REPORT_YEAR:
        status = 'New'
    else:
        if this_donor_year_amounts.iloc[-1] > this_donor_year_amounts.iloc[-2]:
            status = 'Upgrade'
        else:
            status = 'Downgrade'
    donor_stats.loc[donor, 'status'] = status
            
display(donor_stats.head())

Unnamed: 0_level_0,year,amount
donor_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adrian Hernandez,2022,163.96
Adrian Hernandez,2023,65.26
Adrian Hernandez,2024,203.63
Adrian Hernandez,2025,75.96
Adrian Johnson,2022,261.54


Unnamed: 0,num_years_donated,first_year,last_year,status,average_annual_donation
Adrian Hernandez,4,2022,2025,Downgrade,127.2025
Adrian Johnson,4,2022,2025,Downgrade,222.4125
Adrian Perez,4,2022,2025,Downgrade,106.3825
Adrian Taylor,4,2022,2025,Upgrade,98.75
Adrian Torres,3,2022,2024,Lost,275.88


In [20]:
donor_year_amounts.loc[donor]

year      2024.00
amount     511.87
Name: David White, dtype: float64

In [None]:
%debug

In [None]:
this_donor_year_amounts.loc[2025] > this_donor_year_amounts.loc[2024]