In [1]:
from interchange.persistence.file import FileStorage
from interchange.visa import transform, extract, clean, calculate, interchange, store
layer = FileStorage.Layer

In [2]:
client_id = "SBSA"
file_id = "B6781ADDCFE0CD800BFA2968A6ED2816"

In [3]:
origin_layer = layer.STAGING
target_layer = layer.STAGING
client_id = client_id
file_id = file_id
origin_subdir = "100-SMS_RAW_MESSAGES"
target_subdir = "200-SMS_EXT_MESSAGES"

In [4]:
print(origin_layer,target_layer,client_id,file_id,origin_subdir,target_subdir)

staging staging SBSA B6781ADDCFE0CD800BFA2968A6ED2816 100-SMS_RAW_MESSAGES 200-SMS_EXT_MESSAGES


In [5]:
import pandas as pd

from interchange.logs.logger import Logger
from interchange.persistence.database import Database
from interchange.persistence.file import FileStorage


log = Logger(__name__)
fs = FileStorage()


In [13]:
def _load_visa_field_definitions(type_record: str, sort_by: list[str]) -> pd.DataFrame:
    """
    Return a dataframe of Visa field definitions ordered by specific fields.
    """
    db = Database()
    fd = db.read_records(
        table_name="visa_fields",
        fields=[
            "type_record",
            "tcsn",
            "position",
            "length",
            "column_name",
            "secondary_identifier_pos",
            "secondary_identifier_len",
            "secondary_identifier",
        ],
        where={"type_record": type_record},
    )
    int_cols = [
        "position",
        "length",
        "secondary_identifier_pos",
        "secondary_identifier_len",
    ]
    fd[int_cols] = fd[int_cols].apply(
        pd.to_numeric, downcast="integer", errors="coerce"
    )
    return fd.sort_values(sort_by, ascending=True)

In [48]:
field_defs = _load_visa_field_definitions(
        "sms", sort_by=["secondary_identifier","position"]
    )
field_defs = field_defs[field_defs["secondary_identifier"]!="V22000"]

2025-11-20 14:22:41,512 :: PID 33928 :: TID 24072 :: database._create_connection :: Line 33 :: DEBUG :: Connected to SQLite database
2025-11-20 14:22:41,512 :: PID 33928 :: TID 24072 :: database.read_records :: Line 138 :: DEBUG :: Attempting to execute SELECT SQL statement
2025-11-20 14:22:41,512 :: PID 33928 :: TID 24072 :: database._execute :: Line 57 :: DEBUG :: SQL statement executed successfully
2025-11-20 14:22:41,528 :: PID 33928 :: TID 24072 :: database._close_connection :: Line 44 :: DEBUG :: Closed connection to SQLite database


In [49]:
field_defs

Unnamed: 0,type_record,tcsn,position,length,column_name,secondary_identifier_pos,secondary_identifier_len,secondary_identifier
1,sms,0,41,1,issuer_acquirer_indicator,35,6,V22200
10,sms,0,42,10,mvv_code,35,6,V22200
24,sms,0,52,1,remote_terminal_indicator,35,6,V22200
26,sms,0,53,1,charge_indicator,35,6,V22200
29,sms,0,54,2,product_id_sms,35,6,V22200
...,...,...,...,...,...,...,...,...
66,sms,0,77,12,cryptogram_amount,35,6,V22281
78,sms,0,89,3,cryptogram_currency_code,35,6,V22281
81,sms,0,92,12,cryptogram_cashback_amount,35,6,V22281
9,sms,0,41,30,issuer_discretionary_data,35,6,V22282


In [16]:
data = fs.read_parquet(
        origin_layer,
        client_id,
        file_id,
        subdir=origin_subdir,
    )

2025-11-20 13:30:47,496 :: PID 33928 :: TID 24072 :: database._create_connection :: Line 33 :: DEBUG :: Connected to SQLite database
2025-11-20 13:30:47,497 :: PID 33928 :: TID 24072 :: database.read_records :: Line 138 :: DEBUG :: Attempting to execute SELECT SQL statement
2025-11-20 13:30:47,497 :: PID 33928 :: TID 24072 :: database._execute :: Line 57 :: DEBUG :: SQL statement executed successfully
2025-11-20 13:30:47,504 :: PID 33928 :: TID 24072 :: database._close_connection :: Line 44 :: DEBUG :: Closed connection to SQLite database


In [26]:
data

key,22200,22210,22220,22225,22226,22230,22250,22260,22261,22280,22281,22282
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,3300454858400082SMSRAWDATA00000003V22200A ...,3300454858400082SMSRAWDATA00000004V22210111714...,3300454858400082SMSRAWDATA00000005V22220020510...,3300454858400082SMSRAWDATA00000006V22225020044...,,,3300454858400082SMSRAWDATA00000007V22250111825...,3300454858400082SMSRAWDATA00000008V22260532100...,,3300454858400082SMSRAWDATA00000009V222800 ...,3300454858400082SMSRAWDATA00000010V22281 ...,3300454858400082SMSRAWDATA00000011V22282 ...
1,3300454858400082SMSRAWDATA00000012V22200A ...,3300454858400082SMSRAWDATA00000013V22210111714...,3300454858400082SMSRAWDATA00000014V22220020510...,3300454858400082SMSRAWDATA00000015V22225020045...,,,3300454858400082SMSRAWDATA00000016V22250111825...,3300454858400082SMSRAWDATA00000017V22260532100...,,3300454858400082SMSRAWDATA00000018V222800 ...,3300454858400082SMSRAWDATA00000019V22281 ...,3300454858400082SMSRAWDATA00000020V22282 ...
2,3300454858400082SMSRAWDATA00000021V22200A ...,3300454858400082SMSRAWDATA00000022V22210111714...,3300454858400082SMSRAWDATA00000023V22220020510...,3300454858400082SMSRAWDATA00000024V22225020047...,,,3300454858400082SMSRAWDATA00000025V22250111825...,3300454858400082SMSRAWDATA00000026V22260532100...,,3300454858400082SMSRAWDATA00000027V222800 ...,3300454858400082SMSRAWDATA00000028V22281 ...,3300454858400082SMSRAWDATA00000029V22282 ...
3,3300454858400082SMSRAWDATA00000030V22200A ...,3300454858400082SMSRAWDATA00000031V22210111714...,3300454858400082SMSRAWDATA00000032V22220020510...,3300454858400082SMSRAWDATA00000033V22225020097...,,,3300454858400082SMSRAWDATA00000034V22250111825...,3300454858400082SMSRAWDATA00000035V22260532100...,,3300454858400082SMSRAWDATA00000036V222800 ...,3300454858400082SMSRAWDATA00000037V22281 ...,3300454858400082SMSRAWDATA00000038V22282 ...
4,3300454858400082SMSRAWDATA00000039V22200A ...,3300454858400082SMSRAWDATA00000040V22210111714...,3300454858400082SMSRAWDATA00000041V22220020510...,3300454858400082SMSRAWDATA00000042V22225020022...,,,,3300454858400082SMSRAWDATA00000043V22260532100...,,3300454858400082SMSRAWDATA00000044V222800 ...,3300454858400082SMSRAWDATA00000045V22281 ...,3300454858400082SMSRAWDATA00000046V22282 ...
...,...,...,...,...,...,...,...,...,...,...,...,...
35969,3300454858400082SMSRAWDATA00251762V22200A ...,3300454858400082SMSRAWDATA00251763V22210111811...,3300454858400082SMSRAWDATA00251764V22220020510...,,,,3300454858400082SMSRAWDATA00251765V22250111825...,3300454858400082SMSRAWDATA00251766V22260532200...,,3300454858400082SMSRAWDATA00251767V22280060402...,3300454858400082SMSRAWDATA00251768V222810C0185...,3300454858400082SMSRAWDATA00251769V22282 ...
35970,3300454858400082SMSRAWDATA00251770V22200A ...,3300454858400082SMSRAWDATA00251771V22210111811...,3300454858400082SMSRAWDATA00251772V22220020510...,,,,3300454858400082SMSRAWDATA00251773V22250111825...,3300454858400082SMSRAWDATA00251774V22260532200...,,3300454858400082SMSRAWDATA00251775V22280060402...,3300454858400082SMSRAWDATA00251776V22281151142...,3300454858400082SMSRAWDATA00251777V22282 ...
35971,3300454858400082SMSRAWDATA00251778V22200A ...,3300454858400082SMSRAWDATA00251779V22210111811...,3300454858400082SMSRAWDATA00251780V22220020510...,,,,3300454858400082SMSRAWDATA00251781V22250111825...,3300454858400082SMSRAWDATA00251782V22260532200...,,3300454858400082SMSRAWDATA00251783V22280060402...,3300454858400082SMSRAWDATA00251784V22281FADB2F...,3300454858400082SMSRAWDATA00251785V22282 ...
35972,3300454858400082SMSRAWDATA00251786V22200A ...,3300454858400082SMSRAWDATA00251787V22210111812...,3300454858400082SMSRAWDATA00251788V22220029010...,,,,3300454858400082SMSRAWDATA00251789V22250111825...,3300454858400082SMSRAWDATA00251790V22260532200...,,,,


In [55]:
fields = []
for _, fd in field_defs.iterrows():
    fd["secondary_identifier"] = fd["secondary_identifier"][1:]
    data_view = data
    
    field = pd.Series(
        data_view[fd["secondary_identifier"]].str.slice(
            start=fd["position"] - 1, stop=fd["position"] + fd["length"] - 1
        ),
        name=fd["column_name"],
    )
    fields.append(field)

extract_df = pd.concat(fields, axis=1).fillna("").astype(str)    
fs.write_parquet(extract_df, target_layer, client_id, file_id, subdir=target_subdir)    

2025-11-20 14:24:21,111 :: PID 33928 :: TID 24072 :: file.write_parquet :: Line 128 :: DEBUG :: Writing SBSA file B6781ADDCFE0CD800BFA2968A6ED2816 to parquet
2025-11-20 14:24:21,113 :: PID 33928 :: TID 24072 :: database._create_connection :: Line 33 :: DEBUG :: Connected to SQLite database
2025-11-20 14:24:21,113 :: PID 33928 :: TID 24072 :: database.read_records :: Line 138 :: DEBUG :: Attempting to execute SELECT SQL statement
2025-11-20 14:24:21,113 :: PID 33928 :: TID 24072 :: database._execute :: Line 57 :: DEBUG :: SQL statement executed successfully
2025-11-20 14:24:21,113 :: PID 33928 :: TID 24072 :: database._close_connection :: Line 44 :: DEBUG :: Closed connection to SQLite database


In [54]:
extract_df

Unnamed: 0_level_0,issuer_acquirer_indicator,mvv_code,remote_terminal_indicator,charge_indicator,product_id_sms,business_application_identifier,account_funding_source,affiliate_id,settlement_date_sms,draft_identifier_sms,...,application_draft_counter,application_interchange_profile_sms,cryptogram_draft_type,terminal_country_code,terminal_draft_date,cryptogram_amount,cryptogram_currency_code,cryptogram_cashback_amount,issuer_discretionary_data,issuer_script_results_sms
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,A,,,,F,,D,4424630002,111825,385321443675101,...,00000,,00,000,000000,000000000000,000,000000000000,,
1,A,,,,F,,D,4424630002,111825,385321445950920,...,00000,,00,000,000000,000000000000,000,000000000000,,
2,A,,,,F,,D,7374180002,111825,385321456835123,...,00000,,00,000,000000,000000000000,000,000000000000,,
3,A,,,,F,,D,4028240002,111825,585321463117068,...,00233,,00,000,000000,000000000000,000,000000000000,,
4,A,,,,N,,D,4840020002,111825,385321464460423,...,00293,,00,000,000000,000000000000,000,000000000000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35969,A,,,,E,,D,4529010004,111825,345322343424710,...,00060,1800,01,710,251118,000000505000,710,000000000000,,
35970,A,,,,E,,D,4519700004,111825,355322351475509,...,00014,1800,01,710,251118,000000250000,710,000000000000,,
35971,A,,,,E,,D,4519700004,111825,585322351546438,...,00003,1800,01,710,251118,000000155000,710,000000000000,,
35972,A,,,,E,,D,4786890004,111825,385322370600256,...,,,,,,,,,,


In [None]:
fields = []
for _, fd in field_defs.iterrows():
    if not fd["secondary_identifier"]:
        # Use entire dataframe.
        data_view = data
    else:
        # Filter for rows that match secondary condition.
        data_view = data[
            data[fd["tcsn"]].str.slice(
                start=fd["secondary_identifier_pos"] - 1,
                stop=fd["secondary_identifier_pos"]
                + fd["secondary_identifier_len"]
                - 1,
            )
            == fd["secondary_identifier"]
        ]
    # Get field values from data view.
    field = pd.Series(
        data_view[fd["tcsn"]].str.slice(
            start=fd["position"] - 1, stop=fd["position"] + fd["length"] - 1
        ),
        name=fd["column_name"],
    )
    fields.append(field)