In [321]:
import pandas as pd
import numpy as np
import os

In [322]:
def extract_csv_data(csv_file, enumerate_num):
    df = pd.read_csv(f'inputs/{csv_file}')
    payments = df.shape[0] + 1
    payout_date = df['payouts.arrival_date'][0]
    payout_date = pd.to_datetime(payout_date)
    next_payout_sum = df['net_amount'].sum()

    output_str = f"""
    ID: {enumerate_num+1}
    Number of Payments: {payments}
    Date of Payout: {payout_date.strftime('%d-%b-%Y')}
    Payout Amount: £{round(next_payout_sum,2)}
    """

    print(output_str)
    return df

In [323]:
def extract_payment_amounts(df):
    sections = ['Squirrels', 'Beavers', 'Cubs', 'Scouts']
    payment_schedules = ['Activities', 'Monthly Subscriptions']

    result_subsets = []  # List to store subsets

    df = df.groupby(['resources.description'])[['gross_amount', 'gocardless_fees', 'app_fees', 'net_amount']].sum().reset_index()

    for section in sections:
        subset_section = df[df['resources.description'].str.contains(section, case=False)].copy()

        for schedule in payment_schedules:
            subset_schedule = subset_section[subset_section['resources.description'].str.contains(schedule, case=False)].copy()
            subset_schedule['Section Payment Schedule'] = section + ": " + schedule

            result_subsets.append(subset_schedule)

    # Concatenate all subsets into a new DataFrame
    df = pd.concat(result_subsets, ignore_index=True)
    
    # Group so all sections payments for each schedule are together
    df = df.groupby(['Section Payment Schedule'])[['gross_amount', 'gocardless_fees', 'app_fees', 'net_amount']].sum().reset_index()
    
    # Sort so it is easier to read
    df = df.sort_values(by=['Section Payment Schedule'])

    # Display the result DataFrame
    return df

In [324]:
def highlight_bottom_row(s):
    is_bottom_row = s.index == (len(s) - 1)
    return ['background-color: yellow' if v else '' for v in is_bottom_row]

# Apply the style to the DataFrame
styled_df = df.style.apply(highlight_bottom_row, axis=None)


In [325]:
def sort_and_order_df(df):
    
   # Sort so it is easier to read
   df = df.sort_values(by=['Section Payment Schedule'])

   df['total_fees'] = df['gross_amount'] - df['net_amount']

   column_order = ['Section Payment Schedule', 'net_amount', 'total_fees', 'gross_amount', 'gocardless_fees','app_fees']
    
   df = df[column_order]

   df[['Section', 'Schedule']] = df['Section Payment Schedule'].str.split(': ', n=1, expand=True)
   
   column_sums = df.sum()
   
   total_row = pd.DataFrame([column_sums.values], columns=column_sums.index, index=['Total'])
     
   df = pd.concat([df, total_row])
   df.at['Total', 'Section Payment Schedule'] = 'Total'
   df.at['Total', 'Section'] = ''
   df.at['Total', 'Schedule'] = ''
   df = df.style.apply(lambda x: ['background-color: #008080']*len(x), subset=pd.IndexSlice[df.index[-1], :])
   df = df.set_properties(**{'text-align': 'left'}, subset=[('Section Payment Schedule')])
   

   return df

In [326]:
def by_schedule():
    csv_files = [f for f in os.listdir('inputs') if f.endswith(".csv")]
    for idx, csv_file in enumerate(csv_files):
        df = extract_csv_data(csv_file=csv_file, enumerate_num=idx)
        df = extract_payment_amounts(df)
        df = sort_and_order_df(df)
        display(df)
        df.to_excel(f'outputs/{idx}_output.xlsx', index=False)
by_schedule()
        


    ID: 1
    Number of Payments: 39
    Date of Payout: 12-Jan-2024
    Payout Amount: £489.68
    


Unnamed: 0,Section Payment Schedule,net_amount,total_fees,gross_amount,gocardless_fees,app_fees,Section,Schedule
0,Beavers: Activities,43.2,1.8,45.0,0.93,0.87,Beavers,Activities
1,Beavers: Monthly Subscriptions,34.44,1.56,36.0,0.87,0.69,Beavers,Monthly Subscriptions
2,Cubs: Monthly Subscriptions,344.4,15.6,360.0,8.7,6.9,Cubs,Monthly Subscriptions
3,Scouts: Activities,56.16,1.84,58.0,0.71,1.13,Scouts,Activities
4,Squirrels: Monthly Subscriptions,11.48,0.52,12.0,0.29,0.23,Squirrels,Monthly Subscriptions
Total,Total,489.68,21.32,511.0,11.5,9.82,,



    ID: 2
    Number of Payments: 142
    Date of Payout: 12-Feb-2024
    Payout Amount: £2429.51
    


Unnamed: 0,Section Payment Schedule,net_amount,total_fees,gross_amount,gocardless_fees,app_fees,Section,Schedule
0,Beavers: Activities,28.95,1.05,30.0,0.46,0.59,Beavers,Activities
1,Beavers: Monthly Subscriptions,275.52,12.48,288.0,6.96,5.52,Beavers,Monthly Subscriptions
2,Cubs: Activities,110.95,4.05,115.0,1.79,2.26,Cubs,Activities
3,Cubs: Monthly Subscriptions,332.92,15.08,348.0,8.41,6.67,Cubs,Monthly Subscriptions
4,Scouts: Activities,992.37,33.63,1026.0,13.58,20.05,Scouts,Activities
5,Scouts: Monthly Subscriptions,516.6,23.4,540.0,13.05,10.35,Scouts,Monthly Subscriptions
6,Squirrels: Monthly Subscriptions,172.2,7.8,180.0,4.35,3.45,Squirrels,Monthly Subscriptions
Total,Total,2429.51,97.49,2527.0,48.6,48.89,,


In [327]:
import pandas as pd

# Assuming df is your DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'Salary': [50000, 60000, 75000]}

df = pd.DataFrame(data)

# Calculate the sum of each column
column_sums = df.sum()

# Create a new DataFrame with the sums as a new row
total_row = pd.DataFrame([column_sums.values], columns=column_sums.index, index=['Total'])

# Concatenate the original DataFrame with the total row
df_with_total = pd.concat([df, total_row])

# Display the DataFrame with the total row
print(df_with_total)


                  Name  Age  Salary
0                Alice   25   50000
1                  Bob   30   60000
2              Charlie   35   75000
Total  AliceBobCharlie   90  185000
