# Load data

In [56]:
# Load openpyxl
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

In [57]:
import pandas as pd

# existing_file_path = 'data/input/test_input.xlsx'
# existing_file_path = 'data/input/test_input_minimized.xlsx'
# existing_file_path = 'data/input/test_data_table.xlsx'
existing_file_path = 'data/input/tc_pride_data.xlsx'
new_file_path = 'data/output/test_output.xlsx'

df = pd.read_excel(existing_file_path)

# Reading data with pandas

In [58]:
# Read the entire dataframe
# df

## Read headers
# print(df.columns)

## Read individual columns
# print(df.Date)
#  or
# print(df['Net Donation'])
#  or
# print(df[['Date', "Description", "Net Donation"]])

## Read top 3 rows
# print(df.head(3))

## Read row 3 (which has an index of 2),
# print(df.iloc[2])
# then 1-5 (which is index 0-4)
# print(df.iloc[0:5])
# Read specific location (R, C)
# print(df.iloc[2, 1]) # 3rd row, 2nd column

## Iterate over rows
# for index, row in df.iterrows():
    # print(index, row['Date'], row['Description'])
    # print(index, row)
    # print(index, row.Date)

# Conditional selection of rows
# df.loc[df['Net Donation'] > 60]

# Generate statistics
# df.describe()


# Sorting data

In [59]:
# df # Before sorting

# Sorting by single columns
# df = df.sort_values(by=['Description'])
# df = df.sort_values(by=['Event'])
# df = df.sort_values(by=['Source Title'])

# Sorting can be done on multiple columns with this one line of code
df = df.sort_values(by=['Source Title','Event','Description'])

print(df)
# df # After sorting

         Date                                   Description  Net Donation  \
1  2024-02-26  REFUND FOR PAYMENT (michelle.angelo@umn.edu)       -625.00   
0  2024-03-08                              jeffmd@yahoo.com       8150.00   
2  2024-02-24                            thomasjt@gmail.com       3150.00   
3  2024-03-06                             mncompany@aol.com         50.00   
4  2024-03-05                            wxspress@gmail.com         25.00   
5  2024-03-10                        director@eservices.org         50.00   
6  2024-02-26           Billing (2024-02-25): Subscriptions         -0.02   
7  2024-02-25                       Harness Custom Donation          4.00   
8  2024-02-25        One-time donation to Twin Cities Pride         10.00   
9  2024-02-25                         Subscription creation          3.02   
10 2024-03-07                           Subscription update          5.03   
11 2024-03-08               Twin Cities Pride - Order 40036        102.06   

# Sum revenue into different categories

In [60]:

def categorize_revenue(description, event, source_title):
  category = 'unknown'
  # determine which category the row belongs to
  # Pride festival
  # it's the festival if the event contains 'Twin Cities Pride Festival'
  if 'twin cities pride festival' in event and ('pride march' not in source_title and 'book fair' not in source_title):
    category = 'Pride Festival'

  # Pride parade
  # It's a parade if the event contains 'Twin Cities Pride Festival' and source title contains "TC Pride March Application"
  if 'twin cities pride festival' in event and 'pride march' in source_title:
    category = 'Pride Parade'
  
  # Book fair
  # book fair is in the event or source title
  if 'book fair' in event or 'book fair' in source_title:
    category = 'Book Fair'

  # Donation
  # if 'subscription' is in the description, it's a donation
  if 'subscription' in description or 'donation' in description:
    category = 'Donation'
  
  # Merchandise
  if 'twin cities pride - order' in description:
    category = 'Merchandise'
  
  return category


def check_refund(description, net_donation):
  if ('refund' in description or 'return' in description) and net_donation < 0:
    return True
  else:
    return False

def safe_lower(input):
    if isinstance(input, str):  # Checks if input is a string
        return input.lower()
    else:
        return '' # If not a string, return an empty string

def safe_number(input):
    if isinstance(input, (int, float)):  # Checks if input is a number
        return input
    else:
        return 0 # If not a number, return 0

# Variables
category_map = {}
total_donations = 0

# OPTIONAL - Set the preferred date format
preferred_date_format = 'string'
# OPTIONAL - Set the preferred date format

for index, row in df.iterrows():
  description = safe_lower(row['Description'])
  event = safe_lower(row['Event'])
  source_title = safe_lower(row['Source Title'])
  net_donation = safe_number(row['Net Donation'])
  
  # Categorize
  category = categorize_revenue(description, event, source_title)
  # Add it to the dataframe
  df.at[index, 'Category'] = category
  
  
  # OPTIONAL: Convert the Date column to a string
  # If the Date column is a datetime object, convert it to a string
  if preferred_date_format == 'string' and isinstance(row['Date'], pd.Timestamp):
    df.at[index, 'Date'] = row['Date'].strftime('%m/%d/%Y')
  elif preferred_date_format == 'datetime' and isinstance(row['Date'], str):
    df.at[index, 'Date'] = pd.to_datetime(row['Date'])
  # OPTIONAL: Convert the Date column to a string
  
  
  # Check for refund
  is_refund = check_refund(description, net_donation)
  
  # Add category to map
  if category not in category_map:
    category_map[category] = 0

  # Sum up donations
  category_map[category] += net_donation
  total_donations += net_donation
  
  # print('category is:', category, '. refund?:', is_refund)
  # Check the description, event, and source against a dictionary of keywords

# Loop through the category map and convert the values to currency
for key in category_map:
  category_map[key] = '${:,.2f}'.format(category_map[key])

# convert the total donations to currency
total_donations = '${:,.2f}'.format(total_donations)

print(category_map)
print('Total donations:', total_donations)


{'Pride Festival': '$10,725.00', 'Book Fair': '$25.00', 'Pride Parade': '$50.00', 'Donation': '$22.03', 'Merchandise': '$102.06'}
Total donations: $10,924.09


# Convert the dataframe data back into its original form


In [61]:
# Date conversions might be needed for writing to excel files
# df['Date'] = pd.to_datetime(df['Date'],dayfirst=True)

# with pd.ExcelWriter('output.xlsx', date_format='mm/dd/yyyy hh:mm:ss', datetime_format='mm/dd/yyyy hh:mm:ss') as writer:
#     df.to_excel(writer, index=False)
# df['Date'] = pd.to_datetime(df['Date'])


# Turn the date into a string
# df.at[index, 'Date (as string)'] = row['Date'].strftime('%m/%d/%Y')
# df['Date'] = df['Date'].dt.strftime('%m/%d/%Y')



# Description
# REFUND FOR PAYMENT
# One-time donation
# Harness custom donation

# Event
# 2024 Twin Cities Pride Festival	

# Source Title
# 2024 Booth Vendor - Pride Festival
# 2024 Food Vendor - Pride Festival
# 2024 Queer Writes Book Fair @ Pride
# 2024 TC Pride March Application

# Write data into the template file

In [62]:
# Load the pre-formatted Excel template
wb = load_workbook('data/output/test_template.xlsx')
ws = wb.active


# Need to get the column names in order to input all the data in the correct columns
# Get the column names from dataframe as an Index object
column_names = df.columns

# Define starting cell row and column for data insertion
start_row = 2
start_col = 1

# Insert DataFrame data into Excel based on column names
for index, row in df.iterrows():
    for col_index, column_name in enumerate(column_names):
        cell = ws.cell(row=index + start_row, column=col_index + start_col, value=row[column_name])


# Save the workbook as the final output
wb.save('data/output/final_output_from_template.xlsx')

# Methods for writing to a new Excel sheet

In [63]:
# # Load the pre-formatted Excel template
# wb = load_workbook('data/output/test_template.xlsx')
# ws = wb.active

# # Loop through the dataframe and write to the Excel template
# for index, row in df.iterrows():
#     cell = 'A' + str(index + 2)  # Adjust based on where data needs to start
#     ws[cell] = row['Date']
    

# # Assuming data starts at row 2 to preserve headers or formatting in row 1
# for index, row in df.iterrows():
#     cell = 'A' + str(index + 2)  # Adjust based on where data needs to start
#     ws[cell] = row['Date']

# # Save the workbook as the final output
# wb.save('data/output/final_output_from_template.xlsx')

In [64]:
# # Regular method to save the dataframe to an excel file
# df.to_excel(new_file_path, index=False)

# Use Openpyxl to save the format of each column

In [65]:

# # Load the original workbook and active sheet
# wb = load_workbook(existing_file_path)
# ws = wb.active

# # Store formatting in a dictionary
# original_formats = []
# # Get the second row and iterate through each cell
# row = ws[2]
# for cell in row:
#     # Store number format for each cell
#     # Push the column and the number format to the dictionary array
#     original_formats.append((cell.column, cell.number_format))



# # print each column of the first row
# for cell in ws[1]:
#     print(cell.value)

# print(original_formats)

# Use Openpyxl to make formatting changes

In [66]:
# from openpyxl import load_workbook, Workbook # *** In python, imports are declared with the structure "from <module> import <class>"

# # Temporary variable assignment
# spreadsheet_title = 'Payout Report for $123.45 on Mar 7 - asdfghjk'
# # Temporary variable assignment

# # Load worksheet
# wb = load_workbook(filename=new_file_path) # load the file path that's been created by the first script
# ws = wb.active

# # Add revenue categories and donation total
# ws.append([])
# ws.append([])

# # Convert dictionary to list
# category_map_list = []
# for category in category_map:
#   category_map_list.append([category, category_map[category]])
# category_map_list.sort()

# # TEMPORARY TEST
# # test_list = [['Book Fair', 25.0],['Donation', 22.03],['Merchandise', 102.06], ['Pride Festival', 10725.0], ['Pride Parade', 50.0]]
# # category_map_list = test_list
# # TEMPORARY TEST


# # Append the values
# for category in category_map_list:
#   # Append the category key, and the value as a currency
#   ws.append([category[0], category[1]])
# ws.append(['Total', total_donations])

# # Add spreadsheet title
# ws.append([])
# ws.append([])
# ws.append([spreadsheet_title])

# # Save the workbook to a new file
# wb.save(new_file_path)