In [37]:
#imports

import pandas as pd
import re

In [38]:
#constants 

#fees associated with a transaction less than $50
LESS_THAN_50_TSYS_PERCENTAGE_FEE = .01
LESS_THAN_50_TSYS_FLAT_FEE = .10
LESS_THAN_50_FIRST_DATA_PERCENTAGE_FEE = .0125
LESS_THAN_50_FIRST_DATA_FEE = .08
LESS_THAN_50_EVO_PERCENTAGE_FEE = .011
LESS_THAN_50_EVO_FLAT_FEE = .09

#fees associated with a transaction greater than $50
OVER_50_TSYS_PERCENTAGE_FEE = 0.02
OVER_50_TSYS_FLAT_FEE = .10
OVER_50_FIRST_DATA_PERCENTAGE_FEE = 0.01
OVER_50_FIRST_DATA_FLAT_FEE = .09
OVER_50_EVO_PERCENTAGE_FEE = 0.015
OVER_50_EVO_FLAT_FEE = .20

In [39]:
#opening of data 
#stripping of data of < > any any characters within < > characters
sales = open("Transactions_Sales.txt", "r")

line = sales.read()

sales = re.sub('<.*?>', " ", line)

voids = open("Transactions_Voids.txt","r")

voids = voids.read()

voids = re.sub('<.*?>', " ", voids)


In [40]:
#creates dataframe and uses data to populate columns and rows

df = pd.DataFrame(columns=['Type', 'Cost','Unique'])

list = []

for x in sales.splitlines()[1:]:
    if x == " ":
        continue
    c = x.split()
    list.append(c)


df = pd.DataFrame([[x[0], float(x[1]), x[2]]
                  for x in list])

df.columns = ['Type','Amount','Number']
df = df.set_index('Number')


In [41]:
#calculate 3 different service fees for each transaction

def tsysFeeCalculator(Amount):
    
    if Amount >= 50.00:
        
        return (Amount * OVER_50_TSYS_PERCENTAGE_FEE) + OVER_50_TSYS_FLAT_FEE
            
    else:

        return (Amount * LESS_THAN_50_TSYS_PERCENTAGE_FEE) + LESS_THAN_50_TSYS_FLAT_FEE

            
def firstdataFeeCalculator(Amount):
    
    if Amount >= 50.00:
        
        return (Amount * OVER_50_FIRST_DATA_PERCENTAGE_FEE) + OVER_50_FIRST_DATA_FLAT_FEE
            
    else:

        return (Amount * LESS_THAN_50_FIRST_DATA_PERCENTAGE_FEE) + LESS_THAN_50_FIRST_DATA_FEE

                        
def evoFeeCalculator(Amount):
    
    if Amount >= 50.00:
        
        return (Amount * OVER_50_EVO_PERCENTAGE_FEE) + OVER_50_EVO_FLAT_FEE
            
    else:

        return (Amount * LESS_THAN_50_EVO_PERCENTAGE_FEE) + LESS_THAN_50_EVO_FLAT_FEE
    

In [42]:
#applies above functions to each transaction amount and creates a column in the dataframe 
df['TSYS'] = df["Amount"].apply(tsysFeeCalculator)
df['FIRST_DATA'] = df["Amount"].apply(firstdataFeeCalculator)
df['EVO'] = df["Amount"].apply(evoFeeCalculator)


print(df)

        Type  Amount    TSYS  FIRST_DATA      EVO
Number                                           
1       Sale   76.02  1.6204    0.850200  1.34030
2       Sale   83.52  1.7704    0.925200  1.45280
3       Sale    9.50  0.1950    0.198750  0.19450
4       Sale   60.04  1.3008    0.690400  1.10060
5       Sale   73.16  1.5632    0.821600  1.29740
...      ...     ...     ...         ...      ...
360     Sale   17.93  0.2793    0.304125  0.28723
361     Sale   96.89  2.0378    1.058900  1.65335
362     Sale   97.39  2.0478    1.063900  1.66085
363     Sale   35.42  0.4542    0.522750  0.47962
364     Sale   86.41  1.8282    0.954100  1.49615

[364 rows x 5 columns]


In [43]:
#sums each service's column then sorts them based on amount (descending order)
def calculateRank(df):
    
    result = [
    
        (df.loc[df['Type'] == 'Sale','TSYS'].sum(),df.columns[2])
        ,(df.loc[df['Type'] == 'Sale','FIRST_DATA'].sum(),df.columns[3])
        ,(df.loc[df['Type'] == 'Sale','EVO'].sum(),df.columns[4])]
    
    result.sort(reverse = True)
    
    return result
    

In [44]:
#SUM OF TOTAL FEES FOR EACH SERVICE BEFORE VOIDS
print(calculateRank(df))

[(367.0979, 'TSYS'), (317.03677999999996, 'EVO'), (228.140425, 'FIRST_DATA')]


In [45]:
list = []

for x in voids.splitlines()[1:]:
    if x == " ":
        continue
    c = x.split()
    list.append(c)


In [46]:
for x in list:
    
    if float(x[1]) == df.at[str((int(x[2]))),'Amount']:
        
        df.at[str(int(x[2])), 'Type'] = 'VOID'
        
    else:
        
        pass 


In [47]:
#SUM OF TOTAL FEES FOR EACH SERVICE AFTER VOIDS

print(calculateRank(df))

[(351.8089, 'TSYS'), (304.27491, 'EVO'), (219.773525, 'FIRST_DATA')]
