In [1]:
import polars as pl

In [81]:
acc_holder_df = pl.read_csv('data/Account Holders.csv')
acc_info_df = pl.read_csv('data/Account Information.csv')
tra_detail_df = pl.read_csv('data/Transaction Detail.csv')
tra_path_df = pl.read_csv('data/Transaction Path.csv')

In [82]:
display(
    acc_holder_df.glimpse(), 
    acc_info_df.glimpse(),
    tra_detail_df.glimpse(),
    tra_path_df.glimpse()
)

Rows: 3072
Columns: 5
$ Account Holder ID     <i64> 70390615, 20123998, 54374080, 97027297, 89920386, 97325900, 89460366, 53258828, 41402857, 8882143
$ Name                  <str> 'Mahmoud Hehnke', 'Maynord Surgeoner', 'Giraldo Kimbley', 'Blake Dudson', 'Ajay Douce', 'Evonne Lindell', 'Kayla Emtage', 'Effie Bigglestone', 'Federico Cathro', 'Kizzee Puddephatt'
$ Date of Birth         <str> '28/08/1995', '21/08/1997', '22/03/1995', '30/06/1955', '19/12/1930', '07/07/1946', '12/10/1987', '04/03/1992', '13/12/1985', '18/10/2003'
$ Contact Number        <i64> 7479286250, 7716107305, 7489940612, 7253587445, 7395580534, 7631255412, 7867994451, 7623832140, 7283906757, 7598835878
$ First Line of Address <str> '18535 Loftsgordon Park', '6422 Buena Vista Plaza', '93005 Summer Ridge Avenue', '2 Huxley Hill', '90176 Miller Alley', '90176 Miller Alley', '8 Green Ridge Alley', '231 Spenser Hill', '12 Green Hill', '72404 Maywood Avenue'

Rows: 3000
Columns: 5
$ Account Number    <i64> 10005367, 100119

None

None

None

None

In [83]:
acc_info_df = (
    acc_info_df
    .filter(
        pl.col('Account Holder ID').is_not_null()
    )
    # .sort(pl.col('Account Holder ID').str.len_chars(), descending=True)
    .with_columns(
        pl.col('Account Holder ID').str.split(', ')
    )
    .explode('Account Holder ID')
    # .filter(pl.col('Account Holder ID').is_in(["89920386", "97325900"]))
)
acc_info_df.head()

Account Number,Account Type,Account Holder ID,Balance Date,Balance
i64,str,str,str,f64
10005367,"""Platinum""","""70390615""","""2023-01-31""",728.25
10011977,"""Basic""","""20123998""","""2023-01-31""",676.54
10024680,"""Platinum""","""54374080""","""2023-01-31""",567.46
10031238,"""Basic""","""97027297""","""2023-01-31""",576.52
10034341,"""Joint""","""89920386""","""2023-01-31""",390.39


In [89]:
tra_detail_df.select('Cancelled?').unique()

Cancelled?
str
"""Y"""
"""N"""


In [90]:
transactions_df = (
    tra_detail_df
    .join(tra_path_df, how='inner', on='Transaction ID')
    .filter(
        (pl.col('Cancelled?')=='N') &
        (pl.col('Value').gt(1000))
    )
)
transactions_df.head()

Transaction ID,Transaction Date,Value,Cancelled?,Account_To,Account_From
i64,str,f64,str,i64,i64
45024251,"""2023-02-11""",1137.9,"""N""",15826579,53727603
46779876,"""2023-02-02""",1019.5,"""N""",21694967,82023979
57491925,"""2023-02-12""",1761.1,"""N""",19094015,65589565
108536548,"""2023-02-06""",1085.4,"""N""",86207903,10553619
176916326,"""2023-02-12""",1955.6,"""N""",33632099,18185362


In [91]:
accounts_df = (
    acc_holder_df
    .with_columns(
        (pl.lit('0') + pl.col('Contact Number').cast(pl.Utf8)).alias('Contact Number')
    )
    .join(
        acc_info_df.with_columns(
            pl.col('Account Holder ID').cast(pl.Int64)
        ), 
        how='inner', 
        on='Account Holder ID'
    )
    .filter(pl.col('Account Type') != ('Platinum'))
)
accounts_df

Account Holder ID,Name,Date of Birth,Contact Number,First Line of Address,Account Number,Account Type,Balance Date,Balance
i64,str,str,str,str,i64,str,str,f64
20123998,"""Maynord Surgeo…","""21/08/1997""","""07716107305""","""6422 Buena Vis…",10011977,"""Basic""","""2023-01-31""",676.54
97027297,"""Blake Dudson""","""30/06/1955""","""07253587445""","""2 Huxley Hill""",10031238,"""Basic""","""2023-01-31""",576.52
89920386,"""Ajay Douce""","""19/12/1930""","""07395580534""","""90176 Miller A…",10034341,"""Joint""","""2023-01-31""",390.39
97325900,"""Evonne Lindell…","""07/07/1946""","""07631255412""","""90176 Miller A…",10034341,"""Joint""","""2023-01-31""",390.39
89460366,"""Kayla Emtage""","""12/10/1987""","""07867994451""","""8 Green Ridge …",10079965,"""Basic""","""2023-01-31""",799.27
41402857,"""Federico Cathr…","""13/12/1985""","""07283906757""","""12 Green Hill""",10231977,"""Basic""","""2023-01-31""",61.63
8882143,"""Kizzee Puddeph…","""18/10/2003""","""07598835878""","""72404 Maywood …",10232603,"""Gold""","""2023-01-31""",732.55
84620072,"""Kalvin Duetsch…","""27/03/1965""","""07727619510""","""19032 Duke Par…",10243581,"""Gold""","""2023-01-31""",104.27
90848186,"""Mead Yuryshev""","""25/01/1981""","""07380168367""","""7739 Amoth Cir…",10273387,"""Gold""","""2023-01-31""",581.83
20689914,"""Susy Haggus""","""28/03/1948""","""07518352243""","""507 Vahlen Par…",10288821,"""Gold""","""2023-01-31""",642.41


In [102]:
(
    accounts_df
    .join(
        transactions_df, how='inner', left_on='Account Number', right_on='Account_From'
    )
    # .columns
    .rename({'Account_To': 'Account To'})
    .select(
        'Transaction ID',
        'Account To',
        'Transaction Date',
        'Value',
        'Account Number',
        'Account Type',
        'Balance Date',
        'Balance',
        'Name',
        'Date of Birth',
        'Contact Number',
        'First Line of Address'
    )
    .unique()
)

Transaction ID,Account To,Transaction Date,Value,Account Number,Account Type,Balance Date,Balance,Name,Date of Birth,Contact Number,First Line of Address
i64,i64,str,f64,i64,str,str,f64,str,str,str,str
1019117125,43623061,"""2023-02-02""",1502.2,10769769,"""Basic""","""2023-01-31""",906.91,"""Babbette Bromb…","""24/05/1936""","""07355990194""","""447 Dwight Cir…"
6732430556,60114464,"""2023-02-10""",1640.4,11846043,"""Basic""","""2023-01-31""",915.59,"""Harrietta McIl…","""12/05/1971""","""07319685775""","""88637 Rutledge…"
3203866781,86339899,"""2023-02-06""",1792.7,14337606,"""Basic""","""2023-01-31""",561.8,"""Em Willicott""","""25/11/1943""","""07460992733""","""7776 Parkside …"
7466187412,37503622,"""2023-02-09""",1300.7,14448009,"""Joint""","""2023-01-31""",958.46,"""Brinn Attkins""","""13/10/1993""","""07538141937""","""70105 Hayes Wa…"
2926917953,96121732,"""2023-02-11""",1669.7,18348734,"""Basic""","""2023-01-31""",579.93,"""Colene Rastric…","""24/03/1946""","""07955897701""","""8 Macpherson T…"
4000808907,92145159,"""2023-02-13""",1370.8,18447548,"""Basic""","""2023-01-31""",574.24,"""Amalle Townsen…","""06/01/1959""","""07564656009""","""3526 Sutherlan…"
4504569876,64228624,"""2023-02-12""",1153.9,22830156,"""Basic""","""2023-01-31""",596.06,"""Elli Cater""","""04/11/1942""","""07563267406""","""401 Summerview…"
6282069041,63298193,"""2023-02-08""",1360.9,24184530,"""Joint""","""2023-01-31""",37.87,"""Gates Slevin""","""10/12/1972""","""07334338009""","""9 Holmberg All…"
8680634573,35633136,"""2023-02-09""",1855.5,24462859,"""Basic""","""2023-01-31""",646.0,"""Kittie Heakins…","""10/07/1957""","""07641727085""","""9 Village Gree…"
9859809917,25678043,"""2023-02-08""",2283.2,24529850,"""Basic""","""2023-01-31""",800.79,"""Coralie Knelle…","""28/09/1968""","""07134906360""","""682 Morning Pa…"
