In [84]:
# libraries
import csv

In [85]:
# Some global variables
# All the dates
timestamps = ['2015-01-0' + str(x+1) for x in range(9)] +  ['2015-01-' + str(x+1) for x in range(9, 30)]
# All the accounts
accounts = []
# All the transactions
transactions = []

class Transaction:
    '''
    Class for a single transaction
    with account name(string), date (string), fraud(bool), merchant(string)
    '''
    def __init__(self, account, date, fraud, merchant):
        self.account = account
        self.date = date
        self.fraud = fraud
        self.merchant = merchant
    def __str__(self):
        return self.account + ' ' + self.date + ' ' + str(self.fraud) + ' ' + self.merchant

class Account:
    '''
    Class for a single account
    with account name(string), transactions(list of Transaction Object)
    '''
    def __init__(self, name, transactions):
        self.name = name
        self.transactions = transactions
    def __str__(self):
        return self.name + ' ' + str(len(self.transactions))
    def is_frauded(self, check_time):
        global timestamps
        check_timestamps = timestamps[:timestamps.index(check_time)]
        for transaction in self.transactions:
            if transaction.date in check_timestamps and transaction.fraud:
                return True
        return False
    def find_fraudulent_date(self, check_time):
        '''
        This function checks whether this account has frauded transactions later than the check_time
        params:
            check_time: string
        return:
            list of the dates of frauded transactions
            There can be duplicated dates indicating that there are multiple frauded transactions on that day
        '''
        global timestamps
        check_timestamps = timestamps[timestamps.index(check_time)+1:]
        date_list = []
        for transaction in self.transactions:
            if transaction.date in check_timestamps and transaction.fraud:
                date_list.append(transaction.date)
        return date_list

def find_account_by_transaction(transaction):
    # Find the account object by transaction object
    for account in accounts:
        if account.name == transaction.account:
            return account
    return None
def find_account_by_name(name):
    # find the account by its name
    for account in accounts:
        if account.name == name:
            return account
    return None

In [86]:
# Read CSV filles
with open('intern_2019_transactions_simple.csv') as f:
    f_csv = csv.reader(f)
    headers = next(f_csv)
    for row in f_csv:
        if row[2] == 'False':
            row[2] = False
        else:
            row[2] = True
        # Store the transaction data into variable
        transaction = Transaction(row[0], row[1], row[2], row[3])
        transactions.append(transaction)


In [87]:
# Some validation of transactions
print(len(transactions))
# All frauded transactions
frauded_transactions = [transaction for transaction in transactions if transaction.fraud == True]
print(len(frauded_transactions))
# Chcek the timestamps
print(timestamps)
# Find all merchants without duplicated items
merchants = sorted(list(set([transaction.merchant for transaction in transactions])))
# Find all account names without duplicated items
account_names = sorted(list(set([transaction.account for transaction in transactions])))

# Store accounts into variable
for name in account_names:
    new_account = Account(name, [transaction for transaction in transactions if transaction.account == name])
    #print(new_account)
    accounts.append(new_account)

89851
1348
['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-10', '2015-01-11', '2015-01-12', '2015-01-13', '2015-01-14', '2015-01-15', '2015-01-16', '2015-01-17', '2015-01-18', '2015-01-19', '2015-01-20', '2015-01-21', '2015-01-22', '2015-01-23', '2015-01-24', '2015-01-25', '2015-01-26', '2015-01-27', '2015-01-28', '2015-01-29', '2015-01-30']


In [88]:
def compare_two_days(accounts_list_1, accounts_list_2):
    # compare two days and find 
    return len(set(accounts_list_1).intersection(accounts_list_2))

In [89]:
for merchant in merchants:
    merch_trans = [transaction for transaction in transactions if transaction.merchant == merchant]
    # all the transactions on this merchant
    print(merchant, len(merch_trans), '=======')
    # Sort the transactions by date
    merch_trans = sorted(merch_trans, key=lambda x: x.date, reverse=False)
    trans_num = [0 for timestamp in timestamps]
    for timestamp in timestamps:
        # find the transactions on this day and on this merchant
        this_day_transactions = [transaction for transaction in merch_trans if transaction.date == timestamp]
        fraudulent_date = []
        fraudulent_date_dict = {}
        all_accounts = []
        #print(merchant, timestamp)
        for transaction in this_day_transactions:
            if find_account_by_transaction(transaction).name not in all_accounts:
                # find all the accounts that have transactions on this day
                all_accounts.append(find_account_by_transaction(transaction).name)
        #print(all_accounts)
        for account_name in all_accounts:
            account = find_account_by_name(account_name)
            # Get frauded transactions later than this day of these accounts
            fraudulent_date+=account.find_fraudulent_date(timestamp)
        #print(fraudulent_date)
        for date in fraudulent_date:
            # Count the date of all frauded transactions of these accounts
            fraudulent_date_dict.setdefault(date, 0)
            fraudulent_date_dict[date] +=1 
        new_fraudulent_date_dict = {}
        for key in fraudulent_date_dict.keys():
            # Set the threshold to find the burst of frauded transactions
            if fraudulent_date_dict[key] > 30:
                new_fraudulent_date_dict[key] = fraudulent_date_dict[key]
        #print(fraudulent_date_dict)
        if len(new_fraudulent_date_dict.keys()) > 0:
            # if there is a burst on that day
            print(timestamp)
            print(new_fraudulent_date_dict)
            

2015-01-15
{'2015-01-21': 68, '2015-01-22': 59, '2015-01-23': 64}
2015-01-16
{'2015-01-21': 89, '2015-01-22': 97, '2015-01-23': 84}


In [90]:
# Validate the result by listing all the frauded transactions related to those affected accounts
compromise_merchant = 'M18'
compromise_timestamps = ['2015-01-15', '2015-01-16']
# Find the transactions on these days
this_day_transactions = [transaction for transaction in transactions if transaction.merchant == merchant and transaction.date == timestamp]
compromise_all_accounts = []

for transaction in this_day_transactions:
    if find_account_by_transaction(transaction).name not in compromise_all_accounts:
        # find affected accounts without duplicated items
        compromise_all_accounts.append(find_account_by_transaction(transaction).name)
print('affected accounts:', compromise_all_accounts)
print('detail transactions:')
# Print details of the frauded transactions of these affected accounts
for account_name in all_accounts:
    account = find_account_by_name(account_name)
    print('#', account.name)
    for trans in [transaction for transaction in account.transactions if transaction.fraud == True]:
        print(trans)

affected accounts: ['A5', 'A92', 'A138', 'A147', 'A168', 'A213', 'A278', 'A384', 'A387', 'A490', 'A501', 'A504', 'A538', 'A590', 'A607', 'A647', 'A693', 'A698', 'A708', 'A739', 'A748', 'A757', 'A893', 'A974']
detail transactions:
# A5
# A92
A92 2015-01-22 True M49
# A138
# A147
A147 2015-01-01 True M23
# A168
# A213
A213 2015-01-06 True M90
A213 2015-01-07 True M99
A213 2015-01-20 True M51
A213 2015-01-23 True M36
A213 2015-01-24 True M87
# A278
A278 2015-01-04 True M94
# A384
# A387
# A490
A490 2015-01-21 True M33
# A501
A501 2015-01-30 True M75
# A504
# A538
A538 2015-01-13 True M94
# A590
A590 2015-01-13 True M16
A590 2015-01-22 True M7
# A607
A607 2015-01-07 True M49
A607 2015-01-11 True M18
# A647
# A693
A693 2015-01-04 True M24
# A698
# A708
A708 2015-01-02 True M65
A708 2015-01-05 True M38
A708 2015-01-06 True M8
A708 2015-01-25 True M63
# A739
# A748
# A757
# A893
A893 2015-01-22 True M68
# A974
A974 2015-01-16 True M55
