# Load data

In [31]:
import pandas as pd

# existing_file_path = 'data/input/test_input.xlsx'
# existing_file_path = 'data/input/test_input_minimized.xlsx'
# existing_file_path = 'data/input/test_data_table.xlsx'
existing_file_path = 'data/input/tc_pride_data.xlsx'

df = pd.read_excel(existing_file_path)

# Reading data with pandas

In [32]:
# Read the entire dataframe
# df

## Read headers
# print(df.columns)

## Read individual columns
# print(df.Date)
#  or
# print(df['Net Donation'])
#  or
# print(df[['Date', "Description", "Net Donation"]])

## Read top 3 rows
# print(df.head(3))

## Read row 3 (which has an index of 2),
# print(df.iloc[2])
# then 1-5 (which is index 0-4)
# print(df.iloc[0:5])
# Read specific location (R, C)
# print(df.iloc[2, 1]) # 3rd row, 2nd column

## Iterate over rows
# for index, row in df.iterrows():
    # print(index, row['Date'], row['Description'])
    # print(index, row)
    # print(index, row.Date)

# Conditional selection of rows
# df.loc[df['Net Donation'] > 60]

# Generate statistics
# df.describe()


# Sorting data

In [33]:
# df # Before sorting

# Sorting by single columns
# df = df.sort_values(by=['Description'])
# df = df.sort_values(by=['Event'])
# df = df.sort_values(by=['Source Title'])

# Sorting can be done on multiple columns with this one line of code
df = df.sort_values(by=['Source Title','Event','Description'])

df # After sorting

Unnamed: 0,Date,Description,Net Donation,Stripe Fee,Platform Fee,Total Gross Donation,Event,Source Title
1,2024-02-26,REFUND FOR PAYMENT (michelle.angelo@umn.edu),-625.0,,,-625.0,2024 Twin Cities Pride Festival,2024 Booth Vendor - Pride Festival
0,2024-03-08,jeffmd@yahoo.com,8150.0,,,8150.0,2024 Twin Cities Pride Festival,2024 Booth Vendor - Pride Festival
2,2024-02-24,thomasjt@gmail.com,3150.0,,,3150.0,2024 Twin Cities Pride Festival,2024 Food Vendor - Location #2 Only
3,2024-03-06,mncompany@aol.com,50.0,,,50.0,2024 Twin Cities Pride Festival,2024 Food Vendor - Pride Festival
4,2024-03-05,wxspress@gmail.com,25.0,,,25.0,2024 Twin Cities Pride Festival,2024 Queer Writes Book Fair @ Pride
5,2024-03-10,director@eservices.org,50.0,,,50.0,2024 Twin Cities Pride Festival,2024 TC Pride March Application
6,2024-02-26,Billing (2024-02-25): Subscriptions,-0.02,,,-0.02,,
7,2024-02-25,Harness Custom Donation,4.0,0.43,0.12,4.55,,
8,2024-02-25,One-time donation to Twin Cities Pride,10.0,0.62,0.3,10.92,,
9,2024-02-25,Subscription creation,3.02,0.4,0.09,3.51,,


# Sum revenue into different categories

In [40]:

def categorize_revenue(description, event, source_title):
  category = 'unknown'
  # determine which category the row belongs to
  # Pride festival
  # it's the festival if the event contains 'Twin Cities Pride Festival'
  if 'twin cities pride festival' in event and ('pride march' not in source_title and 'book fair' not in source_title):
    category = 'Pride Festival'

  # Pride parade
  # It's a parade if the event contains 'Twin Cities Pride Festival' and source title contains "TC Pride March Application"
  if 'twin cities pride festival' in event and 'pride march' in source_title:
    category = 'Pride Parade'
  
  # Book fair
  # book fair is in the event or source title
  if 'book fair' in event or 'book fair' in source_title:
    category = 'Book Fair'

  # Donation
  # if 'subscription' is in the description, it's a donation
  if 'subscription' in description or 'donation' in description:
    category = 'Donation'
  
  # Merchandise
  if 'twin cities pride - order' in description:
    category = 'Merchandise'
  
  return category


def check_refund(description, net_donation):
  if ('refund' in description or 'return' in description) and net_donation < 0:
    return True
  else:
    return False

def safe_lower(input):
    if isinstance(input, str):  # Checks if input is a string
        return input.lower()
    else:
        return '' # If not a string, return an empty string

def safe_number(input):
    if isinstance(input, (int, float)):  # Checks if input is a number
        return input
    else:
        return 0 # If not a number, return 0

# Variables
category_map = {}
total_donations = 0

for index, row in df.iterrows():
  description = safe_lower(row['Description'])
  event = safe_lower(row['Event'])
  source_title = safe_lower(row['Source Title'])
  net_donation = safe_number(row['Net Donation'])
  
  # Categorize
  category = categorize_revenue(description, event, source_title)
  
  # Check for refund
  is_refund = check_refund(description, net_donation)
  
  # Add category to map
  if category not in category_map:
    category_map[category] = 0

  # Sum up donations
  category_map[category] += net_donation
  total_donations += net_donation
  
  # print('category is:', category, '. refund?:', is_refund)
  # Check the description, event, and source against a dictionary of keywords

print(category_map)
print('Total donations:', total_donations)


{'Pride Festival': 10725.0, 'Book Fair': 25.0, 'Pride Parade': 50.0, 'Donation': 22.03, 'Merchandise': 102.06}
Total donations: 10924.09


# Convert the dataframe data back into its original form


In [35]:
# Date conversions might be needed for writing to excel files
# df['Date'] = pd.to_datetime(df['Date'],dayfirst=True)

# Description
# REFUND FOR PAYMENT
# One-time donation
# Harness custom donation

# Event
# 2024 Twin Cities Pride Festival	

# Source Title
# 2024 Booth Vendor - Pride Festival
# 2024 Food Vendor - Pride Festival
# 2024 Queer Writes Book Fair @ Pride
# 2024 TC Pride March Application

# Write to Excel

In [36]:
# df.to_excel('data/output/test_output.xlsx', index=False)