In [162]:
import polars as pl
from pathlib import Path


# -------------------------------------------------------------------------------
# loading all input files
# -------------------------------------------------------------------------------

# define the paths for all input files
account_info = Path(f'{Path.cwd()}/input/Account Information.csv')
transaction_detail = Path(f'{Path.cwd()}/input/Transaction Detail.csv')
transaction_path = Path(f'{Path.cwd()}/input/Transaction Path.csv')

# load details and path files and transform the data as requried
df_detail = ( pl.read_csv(transaction_detail, has_header=True, parse_dates=True) 
                .filter(pl.col("Cancelled?")=='N')
                .drop("Cancelled?")
            )


# -------------------------------------------------------------------------------
# Transforming the data
# -------------------------------------------------------------------------------

df_path = pl.read_csv(transaction_path,
                      new_columns=['transaction_id', 'account_to', 'account_from'])

df_account_info = pl.read_csv(account_info, 
                              columns=['Account Number', 'Balance Date', 'Balance'],
                              parse_dates=True,
                              new_columns=['account', 'Transaction Date', 'Value']
                             ).select(['Transaction Date', 'Value', 'account'])

# combine all 3 data input files while transforming those
df_comb = ( df_detail.join(df_path, 
                            left_on='Transaction ID', 
                            right_on='transaction_id', 
                            how='inner')
                      .drop('Transaction ID')
                      .melt(['Transaction Date', 'Value'],                                                          # unpivot the data so we have both accounts in the same column
                             variable_name='account_type', 
                             value_name='account')
                      .with_column( pl.when(pl.col('account_type') == 'account_from')                               # account from means money is beind withdrawn, therefore * (-1)
                                     .then(pl.col('Value')* (-1))
                                     .otherwise(pl.col('Value'))
                                     .alias('Value') )
                      .drop('account_type')
                      .vstack(df_account_info)                                                                      # union df_account_info to the joined table
            ).select(pl.col('*').sort_by(['Transaction Date','account', 'Value'], [False, False, True]))            # sorting the df to be sorted by TD ASC, account ASC and Value DESC



# shifting the transaction_value field by 1 for each account
df_comb = ( df_comb.with_columns([ pl.col('Value').shift(periods=1).over('account').alias('value_lag')]) 
                   .with_column(pl.col('Value').cumsum().over(pl.col(['account'])).alias('running_sum'))           # creating a running sum for each account 
                   .drop('Value')
                   #
          )

# sense check
#df_comb.filter(pl.col('account').is_in([10005367]))


# renaming all the fileds as required
df_comb = ( df_comb.rename({col: col.lower().replace(' ', '_') for col in df_comb.columns})
                   .rename({
                                'account' : 'account_number',
                                'value_lag' : 'transaction_value',
                                'running_sum' : 'balance'
                          })
                   .select(['account_number', 'transaction_date', 'transaction_value', 'balance'])
    )



# -------------------------------------------------------------------------------
# outputing the file
# -------------------------------------------------------------------------------

output_dir = Path(f'{Path.cwd()}/output')
output_dir_test = Path('output')                # testig an implicit way to create a path without its parents - this works cuz main.py is in the same directory as /output

if not output_dir_test.exists():
    output_dir_test.mkdir(parents=True, exist_ok=False)
    df_comb.write_csv(f'{output_dir_test}/output-sol-py.csv')
else:
    df_comb.write_csv(f'{output_dir_test}/output-sol-py.csv')





