In [21]:
import pandas as pd
import re

In [22]:
# Provided data: List of MT103 messages
mt103_messages = [
    """
    {1:F01MYMBGB2L0XXX0000000000}{2:I103HBUKGB4BXXXN}{3:{108:MT103
    0001}}{4:
    :20:MT103 0001
    :23B:CRED
    :32A:210322USD5000,
    :50K:/DE98765432101234567890
    COMMERZBANK AG
    HAMBURG, GERMANY
    /COBADEHHXXX
    :52A:/COBADEHHXXX
    COMMERZBANK AG
    HAMBURG, GERMANY
    :53A:/MYMBGB2LXXX
    METRO BANK PLC
    LONDON, UNITED KINGDOM
    :57A:/HBUKGB4BXXX
    HSBC BANK PLC
    LONDON, UNITED KINGDOM
    :59:/GB57METR12345678901234
    NORDFISCH GMBH
    BODENSEE STR. 226
    22761 HAMBURG
    GERMANY
    :71A:OUR
    :71F:/BIC/HBUKGB4BXXX
    :71G:/INS/THIS IS A PAYMENT FOR TUNA SUPPLY
    -}
    """,
    # Add more MT103 messages here
]

In [23]:
# Define a function to extract values from the MT103 message
def extract_value(tag, message):
    regex_pattern = r'(?<=:' + tag + ':)(.*?)(?=\n|\Z)'
    match = re.search(regex_pattern, message)
    if match:
        return match.group(1).strip()
    else:
        return ''

In [24]:
# Create an empty DataFrame with columns as per the provided schema
columns = [
    "transaction_id",
    "originator_account_number",
    "originator_full_name",
    "originator_address",
    "originator_country",
    "originator_bic",
    "originator_fi_name",
    "beneficiary_account_number",
    "beneficiary_full_name",
    "beneficiary_address",
    "beneficiary_country",
    "beneficiary_bic",
    "beneficiary_fi_name",
    "transaction_type",
    "transaction_amount",
    "transaction_currency",
    "transaction_message"
]

empty_df = pd.DataFrame(columns=columns)

In [25]:
# Process each MT103 message
dfs = []
for mt103_message in mt103_messages:
    # Extract values from the MT103 message
    transaction_id = extract_value('20', mt103_message)
    originator_field = extract_value('50K', mt103_message)
    originator_account_number, *originator_details = originator_field.split('\n')
    originator_full_name = '\n'.join(originator_details[:-2]) if len(originator_details) > 2 else ""
    originator_address = originator_details[-2] if len(originator_details) > 1 else ""
    originator_country = originator_details[-1] if originator_details else ""
    originator_bic = extract_value('52A', mt103_message)
    originator_fi_name = extract_value('52A', mt103_message).split('\n')[1] if len(extract_value('52A', mt103_message).split('\n')) > 1 else ""
    beneficiary_account_number = extract_value('59', mt103_message)
    # beneficiary_full_name = extract_value('59', mt103_message).split('\n')[1]
    # beneficiary_address = extract_value('59', mt103_message).split('\n')[2]
    # beneficiary_country = extract_value('59', mt103_message).split('\n')[-2]
    beneficiary_bic = extract_value('57A', mt103_message)
    # beneficiary_fi_name = extract_value('57A', mt103_message).split('\n')[1]
    transaction_type = extract_value('23B', mt103_message)
    transaction_amount = extract_value('32A', mt103_message).split('USD')[1].replace(',', '')
    transaction_currency = 'USD'  # Hardcoded as USD
    transaction_message = extract_value('71G', mt103_message)

    # Create a dictionary with extracted values
    data = {
        "transaction_id": transaction_id,
        "originator_account_number": originator_account_number,
        "originator_full_name": originator_full_name,
        "originator_address": originator_address,
        "originator_country": originator_country,
        "originator_bic": originator_bic,
        "originator_fi_name": originator_fi_name,
        "beneficiary_account_number": beneficiary_account_number,
        # "beneficiary_full_name": beneficiary_full_name,
        # "beneficiary_address": beneficiary_address,
        # "beneficiary_country": beneficiary_country,
        "beneficiary_bic": beneficiary_bic,
        # "beneficiary_fi_name": beneficiary_fi_name,
        "transaction_type": transaction_type,
        "transaction_amount": transaction_amount,
        "transaction_currency": transaction_currency,
        "transaction_message": transaction_message
    }

    # Create DataFrame from dictionary
    df = pd.DataFrame(data, index=[0])
    dfs.append(df)

# Concatenate all DataFrames
result_df = pd.concat(dfs, ignore_index=True)

In [26]:
result_df.head()

Unnamed: 0,transaction_id,originator_account_number,originator_full_name,originator_address,originator_country,originator_bic,originator_fi_name,beneficiary_account_number,beneficiary_bic,transaction_type,transaction_amount,transaction_currency,transaction_message
0,MT103 0001,/DE98765432101234567890,,,,/COBADEHHXXX,,/GB57METR12345678901234,/HBUKGB4BXXX,CRED,5000,USD,/INS/THIS IS A PAYMENT FOR TUNA SUPPLY
