<h1>Assignment for the Case AML (System Development for Fintech)</h1>
<h4>David Widlak</h4>
This document represents an anti money laundering algorithm. This document converts MT103 Swift messages into a workable data structure. These messages are then analyzed using different functions, any suspicious transactions are then flagged using these functions. 

<h4>Initial inclusion of assisting Python libraries</h4>

In [225]:
import re
from datetime import date
import pandas as pd
import numpy as np
from datetime import datetime
import pycountry

<h4>Converting MT103 Swift message into a different data structure</h4>

In [226]:
# Function to structurize MT103 messages into a dictionary
def structurize_mt103(message):
    msg_dic = {}
    # In case the message is a text file
    message = message.read()
    # Split the unstructured message with regex
    key_value = re.split(r'}{|\n:', message)
    # Key without associating value
    if '4:' in key_value:
        key_value.remove('4:')
    elif '{4:' in key_value:
        key_value.remove('{4:')
    elif '4' in key_value:
        key_value.remove('4')

    for kv_str in key_value:
        # TODO: Clean the key strings, remove 
        # kv_str = re.sub(r',|\n', '', str(key_value))
        # Split into key value pairs
        key, value = kv_str.split(':', maxsplit = 1)
        msg_dic[key] = value

    return msg_dic

In [227]:
# Create dataframe, which may later be populated with MT103 messages. Returns a pandas dataframe.
# This function creates readable keys, so MT103 variables can be assigned to readable keys.
# This function also creates a dictionary which contains all keys with variable requirements as values


def create_dataframe(mt103_markup_message):
    df, value_requirements = pd.DataFrame(), {}

    # Define regular expression ˚pattern to find keys in the message
    key_re = re.compile(r'(transaction|instrument|originator|beneficiary|ingoing|outgoing)_*')

    # Load MT103 file and read it line by line
    for line in mt103_markup_message:
        # Use the regex to find key / value pairs
        if key_re.search(line) and not line.startswith('//'):   
            key, value = line.split(maxsplit=1)    
            # Assign keys as columns into the dataframe
            key_name = key.rstrip(':')
            value_requirements[key] = value
            df[key_name] = np.NaN

    # Returns dataframe and value requirements dictionary
    return df, value_requirements

In [228]:
# Function to disect currency, amounth and date from MT103 tag 32A
def extract_32a(value):
    date = value[:6]
    # Extract the currency type (next 3 characters)
    currency_type = value[6:9]
    # Extract the amount (rest of the string)
    amount = value[9:]

    return date, currency_type, amount

In [229]:
# Function to universally extract values from bank information from MT103 message
def extract_bank_information(value):
    # Split the string into lines
    lines = value.split('\n')

    # Extract the information from each line of a bank value. 
    # Seperates bic, bank and address
    if len(lines) == 3:
        print('3')
        bic = lines[0][1:]
        bank = lines[1]
        location = lines[2]
    elif len(lines) == 4:
        print('4')
        bic = lines[0][1:]
        bank = lines[1]
        location = lines[2]
    elif len(lines) == 5:
        print('5')
        bic = lines[0][1:]
        bank = lines[1]
        location = lines[2:]
    elif len(lines) == 6:
        print('6')
        bic = lines[0][1:]
        bank = lines[1]
        location = lines[2:]
    else:
        print('Amount of lines is outside of range')
    
    if ',' in location:
        location = location.split(',')
    
    originator_reference = ''

    # Recognize whether last line is place or reference and its it as return item
    if str(lines[-1]).startswith('/'):
        originator_reference = lines[-1][1:]
        city = location[0] 
        # address = location[1] 
        country = location[1]
        return bic, bank, city, country, originator_reference
    elif len(location) > 2:
        city = location[0] 
        address = location[1] 
        country = location[2]
        if originator_reference:
            return bic, bank, city, address, country
        else:
            return bic, bank, city, address, country, originator_reference
    else:
        city = location[0] 
        country = location[1]
        if originator_reference:
            return bic, bank, city, country
        else:
            return bic, bank, city, country, originator_reference

with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)


swieft = '''/GB57METR12345678901234
NORDFISCH GMBH
BODENSEE STR. 226
22761 HAMBURG
GERMANY'''

print(extract_bank_information(swieft))

5
('GB57METR12345678901234', 'NORDFISCH GMBH', 'BODENSEE STR. 226', '22761 HAMBURG', 'GERMANY', '')


In [230]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

swift_dic

{'{1': 'F01MYMBGB2L0XXX0000000000',
 '2': 'I103HBUKGB4BXXXN',
 '3': '{108:MT103\n0001}',
 '20': 'MT103 0001',
 '23B': 'CRED',
 '32A': '210322USD5000,',
 '50K': '/DE98765432101234567890\nCOMMERZBANK AG\nHAMBURG, GERMANY\n/COBADEHHXXX',
 '52A': '/COBADEHHXXX\nCOMMERZBANK AG\nHAMBURG, GERMANY',
 '53A': '/MYMBGB2LXXX\nMETRO BANK PLC\nLONDON, UNITED KINGDOM',
 '57A': '/HBUKGB4BXXX\nHSBC BANK PLC\nLONDON, UNITED KINGDOM',
 '59': '/GB57METR12345678901234\nNORDFISCH GMBH\nBODENSEE STR. 226\n22761 HAMBURG\nGERMANY',
 '71A': 'OUR',
 '71F': '/BIC/HBUKGB4BXXX',
 '71G': '/INS/THIS IS A PAYMENT FOR TUNA SUPPLY\n-}'}

In [231]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

with open('swift.txt', 'r') as file:
    df, value_requirements = create_dataframe(file)

swift_corr = {
    'transaction_date': ['32A', 'date'],
    'transaction_id': ['20', 'string'],
    'transaction_message': ['71G', 'string', ''],
    'transaction_currency': ['32A', ['regex', "^(AED|AFN|ALL|AMD|ANG|AOA|ARS|AUD|AWG|AZN|BAM|BBD|BDT|BGN|BHD|B IF|BMD|BND|BOB|BOV|BRL|BSD|BTN|BWP|BYR|BZD|CAD|CDF|CHE|CHF|CHW|CLF|CLP |CNY|COP|COU|CRC|CUC|CUP|CVE|CZK|DJF|DKK|DOP|DZD|EGP|ERN|ETB|EUR|FJD|F KP|GBP|GEL|GHS|GIP|GMD|GNF|GTQ|GYD|HKD|HNL|HRK|HTG|HUF|IDR|ILS|INR|IQD |IRR|ISK|JMD|JOD|JPY|KES|KGS|KHR|KMF|KPW|KRW|KWD|KYD|KZT|LAK|LBP|LKR|L RD|LSL|LTL|LVL|LYD|MAD|MDL|MGA|MKD|MMK|MNT|MOP|MRO|MUR|MVR|MWK|MXN|MXV |MYR|MZN|NAD|NGN|NIO|NOK|NPR|NZD|OMR|PAB|PEN|PGK|PHP|PKR|PLN|PYG|QAR|R ON|RSD|RUB|RWF|SAR|SBD|SCR|SDG|SEK|SGD|SHP|SLL|SOS|SRD|SSP|STD|SVC|SYP |SZL|THB|TJS|TMT|TND|TOP|TRY|TTD|TWD|TZS|UAH|UGX|USD|USN|USS|UYI|UYU|U ZS|VEF|VND|VUV|WST|XAF|XAG|XAU|XBA|XBB|XBC|XBD|XCD|XDR|XFU|XOF|XPD|XPF |XPT|XSU|XTS|XUA|XXX|YER|ZAR|ZMW|ZWL)$"]],
    'transaction_amount': ['32A', 'string'],
    'transaction_type': ['23B', 'string'],
    'transaction_direction': ['null', ['i', 'o', 'io']],
    'transaction_status': ['null', ['accepted', 'rejected']],
    'instrument_type': ['null', ['cash','check','ach/lcy_transfers','wire','securities','e- money/mobile_money','travellers_cheques','prepaid_cards','certified_cheques','vouchers','cashier_cheques/money_order','precious_metal','cryp to/virtual_assets','interest/dividend','other']],
    'originator_full_name': ['50', 'string'],
    'originator_first_name': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'originator_last_name': ['null', ['string', '']],
    'originator_address': ['50K', 'string'],
    'originator_country': ['50', ['regex', "^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$"]],
    'originator_account_number': ['50', 'string'],
    'originator_branch_id': ['null', ['string', '']],
    'originator_bic': ['50', 'string'],
    'originator_fi_name': ['50', 'string'],
    'originator_fi_country': ['50', 'string'],
    'incoming_intermediary_fi_bic': ['56', [(r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi"), '']],
    'outgoing_intermediary_fi_bic': ['57A', [(r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi"), '']],
    'beneficiary_full_name': ['59', 'string'],
    'beneficiary_first_name': ['null', ['string', '']],
    'beneficiary_middle_names_patronymic': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'beneficiary_last_name': ['null', ['string', '']],
    'beneficiary_account_number': ['null', 'string'],
    'beneficiary_address': ['59', 'string'],
    'beneficiary_country': ['59', ['regex', "^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$"]],
    'beneficiary_branch_id': ['null', ['string', '']],
    'beneficiary_bic': ['71F', 'string'],
    'beneficiary_fi_name': ['59', 'string'],
    'beneficiary_fi_country': ['59', 'string'],
}

# TODO: add case where new swift variable is added which isn't in the dictionary above
# TODO: automatically read all swift messages, and give succession print statements
# For each key in swift_corr, get the value that's in the swift_dic
# For each value in the swift_dic, find the corresponding variable, which is the key in swift_corr. The belonging value is the mt103 tag
# For each value, put it in the df. But before doing so, get the requirements which is the second list item in the swift_corr.

populated_dic = {}

# Function to populate the dataframe from a MT103 dictionary. Function takes swift MT103 message as parameter
# and uses the swift_corr as translator, it gaps the bridge between swift tags and readable variable names.
# This function also checks the input validity which is the second list value item in swift_corr.
# TODO: add aditional checks for mutliple attribute variables, use functions
def populate_dataframe(mt103_message):
    for item in swift_corr:
        for key in mt103_message:
            if swift_corr[item][0] == key:
                # if swift_corr[item][1] == 'string':
                try:
                    # Insert each value as a string
                    df[item] = [str(mt103_message[key])]
                except:
                    print('Cannot convert data to string type')
                # elif swift_corr[item][1] == 'int':
                #     try:
                #         df[item] = [int(mt103_message[key])]
                #     except:
                #         print('Cannot convert data to integer type')
                # elif swift_corr[item][1] == 'date':
                #     try:
                #         df[item] = [datetime.strptime((mt103_message[key]), '%d%m%Y').date()]
                #     except:
                #         print('Cannot convert data to date type')
                # elif len(swift_corr[item][1]) > 0:
                #     # Allow empty values
                #     if '' in swift_corr[item][0]:
                #         continue
                #     # If regex check, apply the check 
                #     elif  'regex' in swift_corr[item]:
                #         if re.match(swift_corr[item][1][1], mt103_message[key]):
                #             df[item] = [str(mt103_message[key])]
                #         else:
                #             print('Value does not pass the regex check')
                #     else:
                #         if mt103_message[key] in swift_corr[item][1]:
                #             df[item] = [str(mt103_message[key])]
                #         else:
                #             print('This value does not correspond to any legal values for this field')            

populate_dataframe(swift_dic)
df


Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_bic,beneficiary_fi_name,beneficiary_fi_country,transaction_amount
0,"210322USD5000,",MT103 0001,/INS/THIS IS A PAYMENT FOR TUNA SUPPLY\n-},"210322USD5000,",CRED,,,,,,/DE98765432101234567890\nCOMMERZBANK AG\nHAMBU...,,,,,,,"/HBUKGB4BXXX\nHSBC BANK PLC\nLONDON, UNITED KI...",/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,,/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,,/BIC/HBUKGB4BXXX,/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,"210322USD5000,"


In [232]:
# Currently some values should be seperated over multiple columns. 
# This function makes sure unecessary information is removed from cells.
def clean_cells(df):
    # Firstly extract and split transaction details. The swift_corr facilitates this
    transaction_columns = []
    for key in swift_corr:
        if '32A' in swift_corr[key]:
            transaction_columns.append(key)

    # Get unsplit raw string and split it
    transaction_info_str = df[transaction_columns[0]][0]
    trans_date, trans_currency, trans_amount = extract_32a(transaction_info_str)

    # Insert the splitted data
    for item in transaction_columns:
        if 'transaction_date' in item:
            df[item] = trans_date
        elif 'transaction_amount' in item:
            df[item] = trans_amount
        elif 'transaction_currency' in item:
            df[item] = trans_currency

    bank_originator_columns = []

    for key in swift_corr:
        if '50' in swift_corr[key] or '50K' in swift_corr[key] or '71G' in swift_corr[key]:
            bank_originator_columns.append(key)

    originator_info_str = df['originator_address'][0]
    originator_list = extract_bank_information(originator_info_str)

    if len(originator_list) == 4:
        bic, bank, city, country = extract_bank_information(originator_info_str)
        for item in bank_originator_columns:
            if 'originator_bic' in item:
                df[item] = bic
            elif 'originator_fi_country' in item:
                df[item] = bic[0:2]
            elif 'originator_branch' in item:
                df[item] = bank
            elif 'originator_city' in item:
                df[item] = city
            elif 'originator_country' in item:
                # Convert country in DF to a ISO country code using pycountry
                if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                    country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                    df[item] = country_iso
                else:
                    df[item] = country
    
    else:
        bic, bank, city, country, reference = extract_bank_information(originator_info_str)
        for item in bank_originator_columns:
            if 'originator_bic' in item:
                df[item] = bic
            elif 'originator_fi_name' in item:
                df[item] = bank
            elif 'originator_fi_country' in item:
                df[item] = bic[0:2]
            elif 'originator_address' in item:
                df[item] = city
            elif 'originator_country' in item:
                if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                    country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                    df[item] = country_iso
                else:
                    df[item] = country
            elif 'reference' in item:
                df[item] = reference

    bank_beneficiary_columns = []
    for key in swift_corr:
        if '59' in swift_corr[key]:
            bank_beneficiary_columns.append(key)

    beneficiary_info_str = df['beneficiary_address'][0]
    print(beneficiary_info_str)
    print(extract_bank_information(beneficiary_info_str))

    if len(extract_bank_information(beneficiary_info_str)) == 5:
        bic, bank, city, address, country = extract_bank_information(beneficiary_info_str)
        for item in bank_beneficiary_columns:
            if 'beneficiary_bic' in item:
                df[item] = bic
            elif 'beneficiary_fi_name' in item:
                df[item] = bank
            elif 'beneficiary_fi_country' in item:
                df[item] = bic[0:2]
            elif 'beneficiary_address' in item:
                df[item] = city
            elif 'beneficiary_country' in item:
                if country:
                    if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                        country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                        df[item] = country_iso
                    else:
                        df[item] = country
            
    else:
        bic, bank, city, address, country, reference = extract_bank_information(beneficiary_info_str)
        for item in bank_beneficiary_columns:
            if 'beneficiary_bic' in item:
                df[item] = bic
            elif 'beneficiary_fi_name' in item:
                df[item] = bank
            elif 'beneficiary_address' in item:
                df[item] = city
            elif 'beneficiary_country' in item:
                df[item] = bic[0:2]
            elif 'beneficiary_fi_country' in item:
                if country:
                    if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                        country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                        df[item] = country_iso
                    else:
                        df[item] = country
            elif 'beneficiary_country' in item:
                if country:
                    if pycountry.countries.search_fuzzy(country)[0].alpha_2:
                        country_iso = pycountry.countries.search_fuzzy(country)[0].alpha_2
                        df[item] = country_iso
                    else:
                        df[item] = country
            elif 'reference' in item:
                df[item] = reference


    if '/BIC/' in df['beneficiary_bic'][0]:
        df['beneficiary_bic'][0] = df['beneficiary_bic'][0].replace('/BIC/', '')
    
    if ',' in df['transaction_amount'][0]:
        df['transaction_amount'][0] = df['transaction_amount'][0].replace(',', '')
    
    if '/INS/' in df['transaction_message'][0]:
        df['transaction_message'][0] = df['transaction_message'][0].replace('/INS/', '')
    
    if '\n-}' in df['transaction_message'][0]:
        df['transaction_message'][0] = df['transaction_message'][0].replace('\n-}', '')

clean_cells(df)
pd.set_option('display.max_columns', None)
df

4
4
/GB57METR12345678901234
NORDFISCH GMBH
BODENSEE STR. 226
22761 HAMBURG
GERMANY
5
('GB57METR12345678901234', 'NORDFISCH GMBH', 'BODENSEE STR. 226', '22761 HAMBURG', 'GERMANY', '')
5
5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['beneficiary_bic'][0] = df['beneficiary_bic'][0].replace('/BIC/', '')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['transaction_amount'][0] = df['transaction_amount'][0].replace(',', '')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['transaction_message'][0] = df['transaction_message'][0].replace('/INS/', '')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stab

Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,originator_address,originator_country,originator_account_number,originator_branch_id,originator_bic,originator_fi_name,originator_fi_country,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_bic,beneficiary_fi_name,beneficiary_fi_country,transaction_amount
0,210322,MT103 0001,THIS IS A PAYMENT FOR TUNA SUPPLY,USD,CRED,,,,,,HAMBURG,DE,,,DE98765432101234567890,COMMERZBANK AG,DE,"/HBUKGB4BXXX\nHSBC BANK PLC\nLONDON, UNITED KI...",/GB57METR12345678901234\nNORDFISCH GMBH\nBODEN...,,BODENSEE STR. 226,GB,,HBUKGB4BXXX,NORDFISCH GMBH,DE,5000


<h2>AML Functions</h2>
<h4>Round amounth payments</h4>

In [233]:
def is_round(df):
    num = df['transaction_amount'][0]
    # Try to convert into a number, if possible throw exception
    try:
        num = int(num)
        # Check for round number
        if num % 10 == 0:
            return "Is a round number"
        else:
            return "Is not a round number"
    except ValueError:
        return "Could not convert to number"
    
is_round(df)


'Is a round number'

In [234]:
def is_smurfing(mt103_message):
    # extract the beneficiary's account number
    match = re.search(':59:/(.*?)\n', mt103_message)
    if not match:
        return False
    beneficiary = match.group(1)

    # extract all transaction amounts
    amounts = re.findall(':32A:[A-Z]{3}\d+,(.*?)\n', mt103_message)

    # check if any transaction amount is less than a threshold value
    threshold = 10000  # change this value as per your requirement
    
    small_transactions = [amt for amt in amounts if float(amt) < threshold]

    # check if any small transaction is being conducted with the same beneficiary
    for transaction in small_transactions:
        if re.search(':59:/'+beneficiary, transaction):
            return True

    return False

In [235]:
def high_risk_country(df):
    # List of country codes of highest risk countries based on https://fsi.taxjustice.no/fsi/2022/world/index/top
    hr_countries = ['US', 'CH', 'SG', 'HK', 'LU', 'JP', 'DE', 'AE', 'VG', 'GG']
    
    country_iso = df['originator_country'][0]

    if country_iso in hr_countries:
        return True
    else:
        return False

high_risk_country(df)


True