<h1>Assignment for the Case AML (System Development for Fintech)</h1>
<h4>David Widlak</h4>
This document represents an anti money laundering algorithm. This document converts MT103 Swift messages into a workable data structure. These messages are then analyzed using different functions, any suspicious transactions are then flagged using these functions. 

<h4>Initial inclusion of assisting Python libraries</h4>

In [413]:
import re
from datetime import date
import pandas as pd
from mt103 import MT103
import numpy as np

<h4>Converting MT103 Swift message into a different data structure</h4>

In [452]:
# Function to structurize MT103 messages into a dictionary
def structurize_mt103(message):
    msg_dic = {}
    # In case the message is a text file
    message = message.read()
    # Split the unstructured message with regex
    key_value = re.split(r'}{|\n:', message)
    # Key without associating value
    key_value.remove('4:')

    for kv_str in key_value:
        # Clean the key strings
        # kv_str = re.sub(r'{|}|\n', '', str(key_value))
        # Split into key value pairs
        key, value = kv_str.split(':', maxsplit = 1)
        msg_dic[key] = value

    return msg_dic

In [443]:
# Create dataframe, which may later be populated with MT103 messages. Returns a pandas dataframe.
# This function creates readable keys, so MT103 variables can be assigned to readable keys.
# This function also creates a dictionary which contains all keys with variable requirements as values


def create_dataframe(mt103_markup_message):
    df, value_requirements = pd.DataFrame(), {}

    # Define regular expression ˚pattern to find keys in the message
    key_re = re.compile(r'(transaction|instrument|originator|beneficiary|ingoing|outgoing)_*')

    # Load MT103 file and read it line by line
    for line in mt103_markup_message:
        # Use the regex to find key / value pairs
        if key_re.search(line) and not line.startswith('//'):   
            key, value = line.split(maxsplit=1)    
            # Assign keys as columns into the dataframe
            key_name = key.rstrip(':')
            value_requirements[key] = value
            df[key_name] = np.NaN

    # Returns dataframe and value requirements dictionary
    return df, value_requirements

In [476]:
def get_key(val, dic):
    for key, value in dic.items():
        if val == value:
            return key
 
    return "key doesn't exist"

In [498]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

with open('swift.txt', 'r') as file:
    df, value_requirements = create_dataframe(file)

swift_corr = {
    'transaction_date': ['32A', 'datetime'],
    'transaction_id': ['20', 'string'],
    'transaction_message': ['70', 'string'],
    'transaction_currency': ['32A', (r"^(AED|AFN|ALL|AMD|ANG|AOA|ARS|AUD|AWG|AZN|BAM|BBD|BDT|BGN|BHD|B IF|BMD|BND|BOB|BOV|BRL|BSD|BTN|BWP|BYR|BZD|CAD|CDF|CHE|CHF|CHW|CLF|CLP |CNY|COP|COU|CRC|CUC|CUP|CVE|CZK|DJF|DKK|DOP|DZD|EGP|ERN|ETB|EUR|FJD|F KP|GBP|GEL|GHS|GIP|GMD|GNF|GTQ|GYD|HKD|HNL|HRK|HTG|HUF|IDR|ILS|INR|IQD |IRR|ISK|JMD|JOD|JPY|KES|KGS|KHR|KMF|KPW|KRW|KWD|KYD|KZT|LAK|LBP|LKR|L RD|LSL|LTL|LVL|LYD|MAD|MDL|MGA|MKD|MMK|MNT|MOP|MRO|MUR|MVR|MWK|MXN|MXV |MYR|MZN|NAD|NGN|NIO|NOK|NPR|NZD|OMR|PAB|PEN|PGK|PHP|PKR|PLN|PYG|QAR|R ON|RSD|RUB|RWF|SAR|SBD|SCR|SDG|SEK|SGD|SHP|SLL|SOS|SRD|SSP|STD|SVC|SYP |SZL|THB|TJS|TMT|TND|TOP|TRY|TTD|TWD|TZS|UAH|UGX|USD|USN|USS|UYI|UYU|U ZS|VEF|VND|VUV|WST|XAF|XAG|XAU|XBA|XBB|XBC|XBD|XCD|XDR|XFU|XOF|XPD|XPF |XPT|XSU|XTS|XUA|XXX|YER|ZAR|ZMW|ZWL)$")],
    'transaction_amount': ['32A', 'int'],
    'transaction_type': ['23B', 'string'],
    'transaction_direction': ['null', ['i', 'o', 'io']],
    'transaction_status': ['null', ['accepted', 'rejected']],
    'instrument_type': ['null', ['cash','check','ach/lcy_transfers','wire','securities','e- money/mobile_money','travellers_cheques','prepaid_cards','certified_ch eques','vouchers','cashier_cheques/money_order','precious_metal','cryp to/virtual_assets','interest/dividend','other']],
    'originator_full_name': ['50', 'string'],
    'originator_first_name': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'originator_last_name': ['null', ['string', '']],
    'originator_address': ['50', 'string'],
    'originator_country': ['50', (r"^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$")],
    'originator_account_number': ['50', 'string'],
    'originator_branch_id': ['null', ['string', '']],
    'originator_bic': ['50', 'string'],
    'originator_fi_name': ['50', 'string'],
    'originator_fi_country': ['50', 'string'],
    'incoming_intermediary_fi_bic': ['56', [(r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi"), '']],
    'outgoing_intermediary_fi_bic': ['null', [(r"/([a-z]{4}[a-z]{2}[a-z0-9]{2}([a- z0-9]{3})?;?)+/gmi"), '']],
    'beneficiary_full_name': ['59', 'string'],
    'beneficiary_first_name': ['null', ['string', '']],
    'beneficiary_middle_names_patronymic': ['null', ['string', '']],
    'originator_middle_names_patronymic': ['null', ['string', '']],
    'beneficiary_last_name': ['null', ['string', '']],
    'beneficiary_account_number': ['null', 'string'],
    'beneficiary_address': ['59', 'string'],
    'beneficiary_country': ['59', (r"^(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AT|AZ|BS|BH|BD|BB|B Y|BE|BZ|BJ|BM|BT|BO|BQ|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD |CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CW|CY|CZ|DK|DJ|DM|DO|EC|EG|SV| GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|G T|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO |KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML| MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MN|ME|MS|MA|MZ|MM|NA|NR|NP|NL|NC|NZ|NI|N E|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW |BL|SH|KN|LC|MF|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SX|SK|SI|SB|SO|ZA|GS| SS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|T V|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)$")],
    'beneficiary_branch_id': ['null', ['string', '']],
    'beneficiary_bic': ['59', 'string'],
    'beneficiary_fi_name': ['59', 'string'],
    'beneficiary_fi_country': ['59', 'string'],
}


# TODO:
# For each key in swift_corr, get the value that's in the swift_dic
# For each value in the swift_dic, find the corresponding variable, which is the key in swift_corr. The belonging value is the mt103 tag
# For each value, put it in the df. But before doing so, get the requirements which is the second list item in the swift_corr.

populated_dic = {}

# Function to populate the dataframe from a MT103 dictionary. Function takes swift MT103 message as parameter
# and uses the swift_corr as translator, it gaps the bridge between swift tags and readable variable names.
# This function also checks the input validity which is the second list value item in swift_corr.
def populate_dataframe(mt103_message):
    for item in swift_corr:
        for key in mt103_message:
            if swift_corr[item][0] == key:
                # TODO: Add check for type in swift_corr
                if swift_corr[item][1] == 'string':
                    try:
                        df[item] = [str(mt103_message[key])]
                    except:
                        print('Cannot convert data to string type')
                elif swift_corr[item][1] == 'int':
                    try:
                        df[item] = [int(mt103_message[key])]
                    except:
                        print('Cannot convert data to integer type')
                elif swift_corr[item][1] == 'datetime':
                    try:
                        df[item] = [date(mt103_message[key])]
                    except:
                        print('Cannot convert data to date type')
                elif len(swift_corr[item]) > 1:
                    print('alalal')
                    

            # key = [k for k, v in swift_corr.items() if v == item_parts[1]]
            # result_list.append(item_parts[0])

populate_dataframe(swift_dic)
df

alalal
alalal
210322USD5000,
Cannot convert data to integer type
alalal


Unnamed: 0,transaction_date,transaction_id,transaction_message,transaction_currency,transaction_type,transaction_direction,transaction_status,instrument_type,originator_full_name,originator_first_name,...,originator_fi_country,outgoing_intermediary_fi_bic,beneficiary_full_name,beneficiary_first_name,beneficiary_address,beneficiary_country,beneficiary_branch_id,beneficiary_bic,beneficiary_fi_name,beneficiary_fi_country
0,,MT103 0001,,,CRED,,,,,,...,,,/GB57METR12345678901234\nUnset\nNORDFISCH GMBH...,,/GB57METR12345678901234\nUnset\nNORDFISCH GMBH...,,,/GB57METR12345678901234\nUnset\nNORDFISCH GMBH...,/GB57METR12345678901234\nUnset\nNORDFISCH GMBH...,/GB57METR12345678901234\nUnset\nNORDFISCH GMBH...


In [None]:
# Function to assign dictionary keys to readable variable names and requirements

{}

In [486]:
with open('swift.txt', 'r') as file:
    df, value_requirements = create_dataframe(file)

{'transaction_date:': 'xDateTimeTz\n',
 'transaction_id:': 'notEmpty\n',
 'transaction_message:': 'notEmpty\n',
 'transaction_currency:': 'regex("^(AED|AFN|ALL|AMD|ANG|AOA|ARS|AUD|AWG|AZN|BAM|BBD|BDT|BGN|BHD|B IF|BMD|BND|BOB|BOV|BRL|BSD|BTN|BWP|BYR|BZD|CAD|CDF|CHE|CHF|CHW|CLF|CLP |CNY|COP|COU|CRC|CUC|CUP|CVE|CZK|DJF|DKK|DOP|DZD|EGP|ERN|ETB|EUR|FJD|F KP|GBP|GEL|GHS|GIP|GMD|GNF|GTQ|GYD|HKD|HNL|HRK|HTG|HUF|IDR|ILS|INR|IQD |IRR|ISK|JMD|JOD|JPY|KES|KGS|KHR|KMF|KPW|KRW|KWD|KYD|KZT|LAK|LBP|LKR|L RD|LSL|LTL|LVL|LYD|MAD|MDL|MGA|MKD|MMK|MNT|MOP|MRO|MUR|MVR|MWK|MXN|MXV |MYR|MZN|NAD|NGN|NIO|NOK|NPR|NZD|OMR|PAB|PEN|PGK|PHP|PKR|PLN|PYG|QAR|R ON|RSD|RUB|RWF|SAR|SBD|SCR|SDG|SEK|SGD|SHP|SLL|SOS|SRD|SSP|STD|SVC|SYP |SZL|THB|TJS|TMT|TND|TOP|TRY|TTD|TWD|TZS|UAH|UGX|USD|USN|USS|UYI|UYU|U ZS|VEF|VND|VUV|WST|XAF|XAG|XAU|XBA|XBB|XBC|XBD|XCD|XDR|XFU|XOF|XPD|XPF |XPT|XSU|XTS|XUA|XXX|YER|ZAR|ZMW|ZWL)$")\n',
 'transaction_type:': 'notEmpty\n',
 'transaction_direction:': 'any("i", "o", "io")\n',
 'transaction_stat

In [453]:
with open('mt103.txt', 'r') as file:
    swift_dic = structurize_mt103(file)

<h2>AML Functions</h2>
<h4>Round amounth payments</h4>