In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import sys, os

# add relevant folders to sys path (only needed for jupyter)
proj_path = Path().cwd().parent
for folder in next(os.walk(proj_path))[1]:
    if '.' not in folder: # ignore hidden folders like .git
        path_to_add = os.path.join(proj_path,folder)
        sys.path.append(path_to_add)

from augment_transaction_data import AugmentTransactionData

In [2]:
# Initialize class and load data
atd = AugmentTransactionData(proj_path) 

# get transactions with identifiers (SEPA, BEA, ideal etc..)
transactions = atd.get_transactions_with_identifiers() 

# remove sensitive info or columns
transactions = atd.remove_sensitive_data(transactions,extra_cols=['amount']) # remove sensitive info or columns

transactions.head()

Unnamed: 0,description,identifier
0,SEPA iDEAL IBAN: NL31ABN...,SEPA_iDEAL
1,"BEA, Apple Pay Zettle_*Vief ...",BEA_Apple
2,"BEA, Apple Pay CCV*World Net...",BEA_Apple
3,"BEA, Apple Pay CCV*World Net...",BEA_Apple
4,"BEA, Apple Pay Kronkel BV,PA...",BEA_Apple


In [3]:
#  find most frequent transaction types
most_freq_transaction_types = atd.get_most_freq_transaction_types()

# get dataset with company labels
tr_data_with_companies = atd.get_transactions_with_company_labels(transactions)

# show results
tr_data_with_companies.head()




Unnamed: 0,description,identifier,company
0,SEPA iDEAL IBAN: NL31ABN...,SEPA_iDEAL,Thuisbezorgd.nl
1,"BEA, Apple Pay Zettle_*Vief ...",BEA_Apple,Zettle_*Vief
2,"BEA, Apple Pay CCV*World Net...",BEA_Apple,CCV*World
3,"BEA, Apple Pay CCV*World Net...",BEA_Apple,CCV*World
4,"BEA, Apple Pay Kronkel BV,PA...",BEA_Apple,Kronkel


In [4]:
# example to show what would happen if wrong data types were used for functions (check the key error)

# lets try using data without identifiers to get companies!
transactions = atd.get_transaction_data()

# remove sensitive info
transactions= atd.remove_sensitive_data(transactions)

# try getting companies
tr_data_with_companies = atd.get_transactions_with_company_labels(transactions)


KeyError: "The transaction data used in this function requires the 'identifier' field.Consider using the output of get_transactions_with_identifiers() function.Alternatively, use the function with no input arguments."