<h1>Assignment for the Case AML (System Development for Fintech)</h1>
<h4>David Widlak</h4>
This document represents an anti money laundering algorithm. This document converts MT103 Swift messages into a workable data structure. These messages are then analyzed using different functions and suspicious transactions are flagged.

<h4>Initial inclusion of assisting Python libraries</h4>

In [1762]:
import re # Regex functionality
import pandas as pd # Facilitates data operations
import numpy as np # Used to impute NaN values into dataframes
import pycountry # Used to associate full country names with (ISO 3166-1) alpha-2 codes

<h4>Converting MT103 Swift message into a different data structure</h4>

In [1763]:
# Function to disect currency, amounth and date from MT103 tag 32A
def extract_32a(value):
    date = value[:6]
    # Extract the currency type (next 3 characters)
    currency_type = value[6:9]
    # Extract the amount (rest of the string)
    amount = value[9:]

    return date, currency_type, amount

In [1764]:
# Function to structurize MT103 messages into a dictionary
def structurize_mt103(message):
    msg_dic = {}
    # In case the message is a text file
    message = message.read()
    # Split the unstructured message with regex
    key_value = re.split(r'}{|\n:', message)
    # Key without associating value
    if '4:' in key_value:
        key_value.remove('4:')
    elif '{4:' in key_value:
        key_value.remove('{4:')
    elif '4' in key_value:
        key_value.remove('4')

    for kv_str in key_value:
        # Split into key value pairs
        key, value = kv_str.split(':', maxsplit = 1)
        msg_dic[key] = value

    return msg_dic

In [1765]:
import re

def extract_bank_information(value):
    lines = value.split('\n')

    bic_or_account = None
    name = None
    is_bic = None
    for idx, line in enumerate(lines):
        if re.match(r'^[A-Za-z]{4}[A-Za-z]{2}[A-Za-z0-9]{2}([A-Za-z0-9]{3})?$', line):
            bic_or_account = line
            name = lines[idx + 1]
            location = lines[idx + 2:]
            is_bic = True
            break

    if not bic_or_account:
        bic_or_account = lines[0][1:]
        name = lines[1]
        location = lines[2:]
        is_bic = False

    originator_reference = ''
    if str(lines[-1]).startswith('/'):
        originator_reference = lines[-1][1:]
        location.pop()

    if ',' in location[-1]:
        sub_list = location[-1].split(',')
        location.pop()
        location.extend(sub_list)

    city_or_address = ' '.join(location[:-1])
    is_address = any(char.isdigit() for char in city_or_address)
    country = location[-1]

    return bic_or_account, is_bic,name, city_or_address, is_address, country, originator_reference

swift = '''/DE98765432101234567890
COMMERZBANK AG
HAMBURG, GERMANY
/COBADEHHXXX'''

extract_bank_information(swift)
print(extract_bank_information(swift))

('DE98765432101234567890', 'COMMERZBANK AG', 'HAMBURG', '', ' GERMANY', 'COBADEHHXXX')


In [1766]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

swift_dic

{'{1': 'F01ABNANL2AXXX0000000000',
 '2': 'I103SCBLGB2LXXXXN',
 '3': '{103:TGT',
 '108': 'MT103 0001}',
 '20': 'MT103 0001',
 '23B': 'CRED',
 '32A': '210322USD9899,',
 '50A': '/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS BV\nAMSTERDAM, NETHERLANDS',
 '56A': '/SCBLGB2LXXX\nSTANDARD CHARTERED BANK\nLONDON, UK',
 '57A': '/BNYMUS33XXX\nBNY MELLON\nNEW YORK, NY, US',
 '59': '/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGOS, NIGERIA\nXYZ ENTERPRISES LTD\nLAGOS, NIGERIA',
 '70': 'INV NO. 12345\nREF. 98765\nSUPPLY OF GOODS AS PER PURCHASE ORDER NO. 54321\n-}\n'}

In [1779]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

swift_corr = {
    'transaction_date': ['32A', 'date'],
    'transaction_id': ['20', 'string'],
    'transaction_message': ['71G', 'string', ''],
    'transaction_currency': ['32A', ['regex', "^(AED|AFN|ALL|AMD|ANG|AOA|ARS|AUD|AWG|AZN|BAM|BBD|BDT|BGN|BHD|B IF|BMD|BND|BOB|BOV|BRL|BSD|BTN|BWP|BYR|BZD|CAD|CDF|CHE|CHF|CHW|CLF|CLP |CNY|COP|COU|CRC|CUC|CUP|CVE|CZK|DJF|DKK|DOP|DZD|EGP|ERN|ETB|EUR|FJD|F KP|GBP|GEL|GHS|GIP|GMD|GNF|GTQ|GYD|HKD|HNL|HRK|HTG|HUF|IDR|ILS|INR|IQD |IRR|ISK|JMD|JOD|JPY|KES|KGS|KHR|KMF|KPW|KRW|KWD|KYD|KZT|LAK|LBP|LKR|L RD|LSL|LTL|LVL|LYD|MAD|MDL|MGA|MKD|MMK|MNT|MOP|MRO|MUR|MVR|MWK|MXN|MXV |MYR|MZN|NAD|NGN|NIO|NOK|NPR|NZD|OMR|PAB|PEN|PGK|PHP|PKR|PLN|PYG|QAR|R ON|RSD|RUB|RWF|SAR|SBD|SCR|SDG|SEK|SGD|SHP|SLL|SOS|SRD|SSP|STD|SVC|SYP |SZL|THB|TJS|TMT|TND|TOP|TRY|TTD|TWD|TZS|UAH|UGX|USD|USN|USS|UYI|UYU|U ZS|VEF|VND|VUV|WST|XAF|XAG|XAU|XBA|XBB|XBC|XBD|XCD|XDR|XFU|XOF|XPD|XPF |XPT|XSU|XTS|XUA|XXX|YER|ZAR|ZMW|ZWL)$"]],
    'transaction_amount': ['32A', 'string'],
    'transaction_type': ['23B', 'string'],
    'transaction_direction': ['null', ['i', 'o', 'io']],
    'transaction_status': ['null', ['accepted', 'rejected']],
    'instrument_type': ['null', ['cash','check','ach/lcy_transfers','wire','securities','e- money/mobile_money','travellers_cheques','prepaid_cards','certified_cheques','vouchers','cashier_cheques/money_order','precious_metal','cryp to/virtual_assets','interest/dividend','other']],
    'originator_full_name': ['50', 'string'],
    'originator_first_name': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'originator_last_name': ['null', ['string', '']],
    'originator_address': ['50', 'string'],
    'originator_country': ['50', ['regex', "^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$"]],
    'originator_account_number': ['50', 'string'],
    'originator_branch_id': ['null', ['string', '']],
    'originator_bic': ['50', 'string'],
    'originator_fi_name': ['50', 'string'],
    'originator_fi_country': ['50', 'string'],
    'incoming_intermediary_fi_bic': ['56A', ['regex', r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi", '']],
    'outgoing_intermediary_fi_bic': ['57A', ['regex', r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi", '']],
    'beneficiary_full_name': ['59', 'string'],
    'beneficiary_first_name': ['null', ['string', '']],
    'beneficiary_middle_names_patronymic': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'beneficiary_last_name': ['null', ['string', '']],
    'beneficiary_account_number': ['59', 'string'],
    'beneficiary_address': ['59', 'string'],
    'beneficiary_country': ['59', ['regex', "^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$"]],
    'beneficiary_branch_id': ['null', ['string', '']],
    'originator_bic': ['71F', 'string'],
    'beneficiary_fi_name': ['59', 'string'],
    'beneficiary_fi_country': ['59', 'string'],
}


# Function to populate the dataframe from  MT103 dictionary above. Function takes swift MT103 message as parameter
# and uses the swift_corr as translator, it gaps the bridge between swift tags and readable variable names and the values.
def populate_dataframe(mt103_message, df):
    new_row = {}
    
    for item in swift_corr:
        for key in mt103_message:
            if swift_corr[item][0] in key:
                try:
                    # Insert each value as a string
                    new_row[item] = str(mt103_message[key])
                except:
                    print('Cannot convert data to string type')
    
    # Append the new row to the existing DataFrame
    return pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

# Create an empty dataframe with the required columns
columns = list(swift_corr.keys())
df = pd.DataFrame(columns=columns)

# Call the function with the MT103 message and the existing DataFrame
df = populate_dataframe(swift_dic, df)
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country
0,"210322USD9899,",MT103 0001,,"210322USD9899,","210322USD9899,",CRED,,,,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,,,,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,,,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,/NL20ABNA0404875234\nABNANL2A\nABC SUPPLIERS B...,"/SCBLGB2LXXX\nSTANDARD CHARTERED BANK\nLONDON, UK","/BNYMUS33XXX\nBNY MELLON\nNEW YORK, NY, US",/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...,,,,/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...,/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...,/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...,,/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...,/PASSNGLAXXX\nAFRICAN EXPORT-IMPORT BANK\nLAGO...


In [1768]:
# Universal function for cleaning up strings. Takes the DF, column and the string which needs to be deleted
def clean_string_in_df(df, column_name, sub_string):
    for i in range(len(df)):
        cell_value = df.loc[i, column_name]
        if sub_string in cell_value:
            cell_value = cell_value.replace(sub_string, "")
            df.loc[i, column_name] = cell_value

In [1769]:
# Universal function for extracting multiline bank data
def update_df_with_bank_info(df, bank_info, columns, info_type):
    # Extracts bank information 
    bic_or_account, bank, city, address, country = bank_info[:5]
    reference = bank_info[5] if len(bank_info) > 5 else None

    # Insert the columns with data, info_type can be beneficiary or originator
    for item in columns:
        if f'{info_type}_account_number' in item or f'{info_type}_bic' in item:
            df[item] = bic_or_account
        # Use the country code in the bic to get country
        elif f'{info_type}_fi_country' in item:
            df[item] = bic_or_account[0:2]
        elif f'{info_type}_branch' in item or f'{info_type}_fi_name' in item or f'{info_type}_full_name' in item:
            df[item] = bank
        elif f'{info_type}_city' in item or f'{info_type}_address' in item:
            df[item] = city
        elif f'{info_type}_country' in item:
            if country:
                # Find country using PyCountry
                if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                    country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                    df[item] = country_iso
                else:
                    df[item] = country
        # If reference is included, add it.
        elif 'reference' in item and reference:
            df[item] = reference


def impute_and_clean_df(df):
    # Ensure string variables
    df.loc[:, :] = df.astype(str)
    
    # Get transaction columns
    transaction_columns = [key for key in swift_corr if '32A' in swift_corr[key]]
    transaction_info_str = df[transaction_columns[0]][0]
    # Seperate the string into variables
    trans_date, trans_currency, trans_amount = extract_32a(transaction_info_str)

    for item in transaction_columns:
        if 'transaction_date' in item:
            df[item] = trans_date
        elif 'transaction_amount' in item:
            df[item] = trans_amount
        elif 'transaction_currency' in item:
            df[item] = trans_currency

    # For each of the bank related tags, get the keys, extract the data, and use the function above to impute values
    bank_originator_columns = [key for key in swift_corr if any(x in swift_corr[key] for x in ['50', '50K', '71G'])]
    originator_info_str = df['originator_address'][0]
    originator_bank_info = extract_bank_information(originator_info_str)
    update_df_with_bank_info(df, originator_bank_info, bank_originator_columns, 'originator')

    # Same as the lines above, but for beneficiary
    bank_beneficiary_columns = [key for key in swift_corr if '59' in swift_corr[key]]
    beneficiary_info_str = df['beneficiary_address'][0]
    beneficiary_bank_info = extract_bank_information(beneficiary_info_str)
    update_df_with_bank_info(df, beneficiary_bank_info, bank_beneficiary_columns, 'beneficiary')

    # Clean strings
    clean_string_in_df(df, 'originator_bic', '/BIC/')
    clean_string_in_df(df, 'transaction_amount', ',')
    clean_string_in_df(df, 'transaction_message', '/INS/')
    clean_string_in_df(df, 'transaction_message', '/MSG/')
    clean_string_in_df(df, 'transaction_message', '\n-}')
    clean_string_in_df(df, 'transaction_message', '\n')

impute_and_clean_df(df)
pd.set_option('display.max_columns', None)
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country
0,210322,MT103 0001,,USD,9899,CRED,,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,"/SCBLGB2LXXX\nSTANDARD CHARTERED BANK\nLONDON, UK","/BNYMUS33XXX\nBNY MELLON\nNEW YORK, NY, US",AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA


In [1770]:
# Transaction direction are indicated by the MT message type
# The message type can be extracted from the transaction ID
def add_transaction_direction(df):
    for i in range(len(df)):
        cell_trans_id = df.loc[i, 'transaction_id']
        if 'MT103' in cell_trans_id:
            df.loc[i, 'transaction_direction'] = 'o'
        if 'MT202' in cell_trans_id:
            df.loc[i, 'transaction_direction'] = 'o'
        if 'MT300' in cell_trans_id:
            df.loc[i, 'transaction_direction'] = 'i'
        if 'MT910' in cell_trans_id:
            df.loc[i, 'transaction_direction'] = 'i'
        if 'MT950' in cell_trans_id:
            df.loc[i, 'transaction_direction'] = 'io'

add_transaction_direction(df)
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,"/SCBLGB2LXXX\nSTANDARD CHARTERED BANK\nLONDON, UK","/BNYMUS33XXX\nBNY MELLON\nNEW YORK, NY, US",AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA


In [1771]:
# Function to extract BIC codes from intermediary banks
def extract_bic_intermediary(df):
    # Find columns that contain intermediary bic info
    cols = [col for col in df.columns if 'intermediary_fi_bic' in col]
    for col in cols:
        # Split the string based on '\n', keep the first item which is the bic
        raw_bic = df[col].str.split('\n').str[0]
        raw_bic = str(raw_bic[0]).replace('/', '')
        df[col] = raw_bic

extract_bic_intermediary(df)
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,SCBLGB2LXXX,BNYMUS33XXX,AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA


<h1>AML Functions</h1>
<h3>Round amounth payments</h3>
Looking at round payment transaction amounts is important for anti-money laundering (AML) because it can be an indicator of suspicious activity that may warrant further investigation.

Detection is done by looking if the amount can be divided by 100, if so, the transaction gets flagged as round_number = True.

In [1772]:
def is_round(df):
    # Try to convert into a number, if possible throw exception
    for index, num in df['transaction_amount'].iteritems():
        try:
            num = int(num)
            # Check for round number and add to column
            if num % 100 == 0:
                df.at[index, 'round_number'] = True
            else:
                df.at[index, 'round_number'] = False
        except ValueError:
            return "Could not convert to number"
    
is_round(df)
df


Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country,round_number
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,SCBLGB2LXXX,BNYMUS33XXX,AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA,False


<h3>High risk countries</h3>
Financial secrecy and money laundering are closely linked, as financial secrecy can provide a safe haven for criminals to hide and launder illicitly obtained funds. When a country has weak or non-existent regulations around financial transparency, it becomes an attractive destination for criminals looking to hide their illicit activities.

The detection is done by looking at the originator country of the funds, if this country is included in the list of high financial secrecy, the transaction is flagged with high_risk_country = True.

In [1773]:
# TODO: import text file instead of premade list
def high_risk_country(df, path_to_countries_file):
    # List of country codes of highest risk countries based on 
    # https://fsi.taxjustice.no/fsi/2022/world/index/top from text file
    countries_file = open(path_to_countries_file, "r")
    risk_countries = countries_file.read()
    high_risk_countries = risk_countries.split('\n')
    high_risk_countries = [item.strip() for item in high_risk_countries]

    for index, country_iso in df['originator_country'].iteritems():
        if country_iso in high_risk_countries:
        # set the value to True for the corresponding row
            df.at[index, 'high_risk_country'] = True
        else:
            df.at[index, 'high_risk_country'] = False

high_risk_country(df, "high_risk_countries.txt")
df



Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country,round_number,high_risk_country
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,SCBLGB2LXXX,BNYMUS33XXX,AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA,False,False


<h3>Nesting</h3>
In banking, "nesting" (also known as "layering") is a method of money laundering that involves moving funds through multiple accounts or layers of financial transactions in order to obscure the source and ownership of the funds.

Detection of nesting is done by detecting obscure locations by any involved party, such as beneficiary, intermediary or originator. 

In [1774]:
def nesting_detection(df, path_to_countries_file):
    # Read the countries from a .txt file
    countries_file = open(path_to_countries_file, "r")
    risk_countries = countries_file.read()
    nesting_countries = risk_countries.split('\n')


    for index, row in df.iterrows():
        # Detect as many layers as possible which could facilitate nesting
        if (row['originator_fi_country'] in nesting_countries) or (row['beneficiary_country'] in nesting_countries) or (row['beneficiary_fi_country'] in nesting_countries) or (row['originator_country'] in nesting_countries) or (str(row['outgoing_intermediary_fi_bic']) and row['outgoing_intermediary_fi_bic'][4:6] in nesting_countries) or (str(row['incoming_intermediary_fi_bic']) and row['incoming_intermediary_fi_bic'][4:6] in nesting_countries):
            df.at[index, 'nesting_detection'] = True
        else:
            df.at[index, 'nesting_detection'] = False

nesting_detection(df, 'nesting_countries.txt')
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country,round_number,high_risk_country,nesting_detection
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,SCBLGB2LXXX,BNYMUS33XXX,AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA,False,False,True


<h3>Smurfing</h3>
In banking, "smurfing" (also known as "structuring") refers to the illegal practice of making multiple small deposits or transactions instead of one large deposit or transaction in order to avoid suspicion and detection by authorities.

The detection is done by analyzing multiple transactions from the same originator in a given timeframe, with a self imposed threshold.

In [1775]:
def detect_smurfing(df, threshold=10000):
    # Ensure the transaction_amount column is numeric
    df['transaction_amount'] = pd.to_numeric(df['transaction_amount'], errors='coerce')
    # Group by the originator and sum the transaction_amount
    grouped_df = df.groupby('originator_account_number').agg({'transaction_amount': 'sum'}).reset_index()
    # Find whose account number is exceeding theshold
    smurfing_originators = grouped_df[grouped_df['transaction_amount'] < threshold]['originator_account_number'].tolist()
    # Add a new column 'smurfing_detection' and set values based on the originators
    df['smurfing_detection'] = df['originator_account_number'].apply(lambda x: x in smurfing_originators)

detect_smurfing(df)
df

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_amount,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_middle_names_patronymic,originator_last_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,incoming_intermediary_fi_bic,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_middle_names_patronymic,beneficiary_last_name,beneficiary_account_number,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_fi_name,beneficiary_fi_country,round_number,high_risk_country,nesting_detection,smurfing_detection
0,210322,MT103 0001,,USD,9899,CRED,o,,,ABC SUPPLIERS BV,,,,AMSTERDAM,NL,ABNANL2A,,,ABC SUPPLIERS BV,AB,SCBLGB2LXXX,BNYMUS33XXX,AFRICAN EXPORT-IMPORT BANK,,,,PASSNGLAXXX,"LAGOS, NIGERIA",NG,,AFRICAN EXPORT-IMPORT BANK,PA,False,False,True,True
