In [1]:
from interchange.persistence.file import FileStorage
from interchange.visa import transform, extract, clean, calculate, interchange, store
layer = FileStorage.Layer

In [3]:
client_id = "SBSA"
file_id = "B6781ADDCFE0CD800BFA2968A6ED2816"

In [5]:
origin_layer=layer.LANDING
target_layer=layer.STAGING
client_id=client_id
file_id=file_id
origin_subdir=""
target_subdir="100-SMS_RAW_MESSAGES"

In [6]:
print(origin_layer,target_layer,client_id,file_id,origin_subdir,target_subdir)

landing staging SBSA B6781ADDCFE0CD800BFA2968A6ED2816  100-SMS_RAW_DRAFTS


In [7]:
import pandas as pd

from interchange.logs.logger import Logger
from interchange.persistence.file import FileStorage


log = Logger(__name__)
fs = FileStorage()


In [70]:
def _load_as_ctf(
    layer: FileStorage.Layer, client_id: str, file_id: str, subdir=""
) -> pd.Series:
    """
    Load a Visa interchange file into memory forcing the CTF line format.
    """
    records = fs.read_plaintext(fs.Layer.LANDING, client_id, file_id, subdir=subdir)
    header_record = str(records.iloc[0, 0])
    if len(header_record) == 168:
        return records["lines"]
    if len(header_record) == 170:
        return records["lines"].str.slice(stop=2) + records["lines"].str.slice(start=4)
    log.logger.error("The Visa interchange file has an unknown line length")
    return pd.Series([], name="lines")

In [71]:
VALID_TC = ["33"]
VALID_TCSN = ["0"]
VALID_SMS_TYPES = ["SMSRAWDATA"]
VALID_RAW_DATA_VERSION = ["V22"]
VALID_RECORD_TYPES = [
    # "22000",
    "22200",
    "22210",
    "22220",
    "22225",
    "22226",
    "22230",
    "22250",
    "22260",
    "22261",
    "22280",
    "22281",
    "22282",
]
ctf_records = _load_as_ctf(origin_layer, client_id, file_id, subdir=origin_subdir)

2025-11-20 10:55:47,296 :: PID 19468 :: TID 32312 :: file.read_plaintext :: Line 89 :: DEBUG :: Searching for SBSA file B6781ADDCFE0CD800BFA2968A6ED2816
2025-11-20 10:55:47,300 :: PID 19468 :: TID 32312 :: database._create_connection :: Line 33 :: DEBUG :: Connected to SQLite database
2025-11-20 10:55:47,300 :: PID 19468 :: TID 32312 :: database.read_records :: Line 138 :: DEBUG :: Attempting to execute SELECT SQL statement
2025-11-20 10:55:47,300 :: PID 19468 :: TID 32312 :: database._execute :: Line 57 :: DEBUG :: SQL statement executed successfully
2025-11-20 10:55:47,300 :: PID 19468 :: TID 32312 :: database._close_connection :: Line 44 :: DEBUG :: Closed connection to SQLite database
2025-11-20 10:55:47,300 :: PID 19468 :: TID 32312 :: file.read_plaintext :: Line 92 :: DEBUG :: Opening SBSA file B6781ADDCFE0CD800BFA2968A6ED2816


In [67]:
drafts = ctf_records[
    ctf_records.str.slice(stop=2).isin(VALID_TC)
    & ctf_records.str.slice(start=3, stop=4).isin(VALID_TCSN)
    & ctf_records.str.slice(start=16, stop=26).isin(VALID_SMS_TYPES)
    & ctf_records.str.slice(start=34, stop=37).isin(VALID_RAW_DATA_VERSION)
]

In [61]:
drafts

1566825    251118
1566826    000845
1566827    A     
1566828    111714
1566829    020510
            ...  
1818874    532200
1818875    060402
1818876    620AE9
1818877          
1818878    432563
Name: lines, Length: 251799, dtype: object

In [49]:
def _pivot_values_on_key(values: pd.Series, start: int, stop: int, cols: list[str]):
    """
    Pivot a series of values into records by a sorted numerical key in values.
    """
    values_df = values.to_frame(name="value")
    values_df["key"] = values_df["value"].str.slice(start=start, stop=stop).astype(int)
    values_df["record"] = (values_df["key"] < values_df["key"].shift(1)).cumsum()
    values_df["key"] = values_df["key"].astype(str)
    return (
        values_df.pivot(index="record", columns="key", values="value")
        .reindex(columns=cols)
        .fillna("")
        .astype(str)
    )

In [68]:
drafts_df = _pivot_values_on_key(drafts, start=35, stop=40, cols=VALID_RECORD_TYPES)
log.logger.info(f"Saving Raw SMS Transactions from {client_id} file {file_id}")
fs.write_parquet(drafts_df, target_layer, client_id, file_id, subdir=target_subdir)

2025-11-20 10:53:32,807 :: PID 19468 :: TID 32312 :: 723367796.<module> :: Line 2 :: INFO :: Saving Raw SMS Transactions from SBSA file B6781ADDCFE0CD800BFA2968A6ED2816
2025-11-20 10:53:32,808 :: PID 19468 :: TID 32312 :: file.write_parquet :: Line 128 :: DEBUG :: Writing SBSA file B6781ADDCFE0CD800BFA2968A6ED2816 to parquet
2025-11-20 10:53:32,810 :: PID 19468 :: TID 32312 :: database._create_connection :: Line 33 :: DEBUG :: Connected to SQLite database
2025-11-20 10:53:32,811 :: PID 19468 :: TID 32312 :: database.read_records :: Line 138 :: DEBUG :: Attempting to execute SELECT SQL statement
2025-11-20 10:53:32,814 :: PID 19468 :: TID 32312 :: database._execute :: Line 57 :: DEBUG :: SQL statement executed successfully
2025-11-20 10:53:32,816 :: PID 19468 :: TID 32312 :: database._close_connection :: Line 44 :: DEBUG :: Closed connection to SQLite database


In [69]:
drafts_df   

key,22200,22210,22220,22225,22226,22230,22250,22260,22261,22280,22281,22282
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,3300454858400082SMSRAWDATA00000003V22200A ...,3300454858400082SMSRAWDATA00000004V22210111714...,3300454858400082SMSRAWDATA00000005V22220020510...,3300454858400082SMSRAWDATA00000006V22225020044...,,,3300454858400082SMSRAWDATA00000007V22250111825...,3300454858400082SMSRAWDATA00000008V22260532100...,,3300454858400082SMSRAWDATA00000009V222800 ...,3300454858400082SMSRAWDATA00000010V22281 ...,3300454858400082SMSRAWDATA00000011V22282 ...
1,3300454858400082SMSRAWDATA00000012V22200A ...,3300454858400082SMSRAWDATA00000013V22210111714...,3300454858400082SMSRAWDATA00000014V22220020510...,3300454858400082SMSRAWDATA00000015V22225020045...,,,3300454858400082SMSRAWDATA00000016V22250111825...,3300454858400082SMSRAWDATA00000017V22260532100...,,3300454858400082SMSRAWDATA00000018V222800 ...,3300454858400082SMSRAWDATA00000019V22281 ...,3300454858400082SMSRAWDATA00000020V22282 ...
2,3300454858400082SMSRAWDATA00000021V22200A ...,3300454858400082SMSRAWDATA00000022V22210111714...,3300454858400082SMSRAWDATA00000023V22220020510...,3300454858400082SMSRAWDATA00000024V22225020047...,,,3300454858400082SMSRAWDATA00000025V22250111825...,3300454858400082SMSRAWDATA00000026V22260532100...,,3300454858400082SMSRAWDATA00000027V222800 ...,3300454858400082SMSRAWDATA00000028V22281 ...,3300454858400082SMSRAWDATA00000029V22282 ...
3,3300454858400082SMSRAWDATA00000030V22200A ...,3300454858400082SMSRAWDATA00000031V22210111714...,3300454858400082SMSRAWDATA00000032V22220020510...,3300454858400082SMSRAWDATA00000033V22225020097...,,,3300454858400082SMSRAWDATA00000034V22250111825...,3300454858400082SMSRAWDATA00000035V22260532100...,,3300454858400082SMSRAWDATA00000036V222800 ...,3300454858400082SMSRAWDATA00000037V22281 ...,3300454858400082SMSRAWDATA00000038V22282 ...
4,3300454858400082SMSRAWDATA00000039V22200A ...,3300454858400082SMSRAWDATA00000040V22210111714...,3300454858400082SMSRAWDATA00000041V22220020510...,3300454858400082SMSRAWDATA00000042V22225020022...,,,,3300454858400082SMSRAWDATA00000043V22260532100...,,3300454858400082SMSRAWDATA00000044V222800 ...,3300454858400082SMSRAWDATA00000045V22281 ...,3300454858400082SMSRAWDATA00000046V22282 ...
...,...,...,...,...,...,...,...,...,...,...,...,...
35969,3300454858400082SMSRAWDATA00251762V22200A ...,3300454858400082SMSRAWDATA00251763V22210111811...,3300454858400082SMSRAWDATA00251764V22220020510...,,,,3300454858400082SMSRAWDATA00251765V22250111825...,3300454858400082SMSRAWDATA00251766V22260532200...,,3300454858400082SMSRAWDATA00251767V22280060402...,3300454858400082SMSRAWDATA00251768V222810C0185...,3300454858400082SMSRAWDATA00251769V22282 ...
35970,3300454858400082SMSRAWDATA00251770V22200A ...,3300454858400082SMSRAWDATA00251771V22210111811...,3300454858400082SMSRAWDATA00251772V22220020510...,,,,3300454858400082SMSRAWDATA00251773V22250111825...,3300454858400082SMSRAWDATA00251774V22260532200...,,3300454858400082SMSRAWDATA00251775V22280060402...,3300454858400082SMSRAWDATA00251776V22281151142...,3300454858400082SMSRAWDATA00251777V22282 ...
35971,3300454858400082SMSRAWDATA00251778V22200A ...,3300454858400082SMSRAWDATA00251779V22210111811...,3300454858400082SMSRAWDATA00251780V22220020510...,,,,3300454858400082SMSRAWDATA00251781V22250111825...,3300454858400082SMSRAWDATA00251782V22260532200...,,3300454858400082SMSRAWDATA00251783V22280060402...,3300454858400082SMSRAWDATA00251784V22281FADB2F...,3300454858400082SMSRAWDATA00251785V22282 ...
35972,3300454858400082SMSRAWDATA00251786V22200A ...,3300454858400082SMSRAWDATA00251787V22210111812...,3300454858400082SMSRAWDATA00251788V22220029010...,,,,3300454858400082SMSRAWDATA00251789V22250111825...,3300454858400082SMSRAWDATA00251790V22260532200...,,,,
