In [126]:
'''
Challenge link
https://preppindata.blogspot.com/2023/03/2023-week-10-whats-my-balance-on-this.html
'''

import pandas as pd
import numpy as np
from datetime import datetime

In [114]:
acc_stat = pd.read_csv("Account Statements.csv")
acc_stat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20378 entries, 0 to 20377
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Account Number     20378 non-null  int64  
 1   Balance Date       20378 non-null  object 
 2   Transaction Value  17378 non-null  float64
 3   Balance            20378 non-null  float64
dtypes: float64(2), int64(1), object(1)
memory usage: 636.9+ KB


In [115]:
# acc_stat['Balance Date'] = acc_stat['Balance Date'].astype('datetime64[ns]', format="%d/%m/%y")
acc_stat['Balance Date'] = pd.to_datetime(acc_stat['Balance Date'], dayfirst=True)
acc_stat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20378 entries, 0 to 20377
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Account Number     20378 non-null  int64         
 1   Balance Date       20378 non-null  datetime64[ns]
 2   Transaction Value  17378 non-null  float64       
 3   Balance            20378 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 636.9 KB


In [116]:
acc_stat.head(5)

Unnamed: 0,Account Number,Balance Date,Transaction Value,Balance
0,44873253,2023-02-07,-113.43,522.41
1,17938672,2023-02-01,102.95,1007.94
2,29358670,2023-02-13,-56.85,-39.49
3,32803222,2023-02-13,96.06,1059.24
4,83975923,2023-01-31,,837.45


### Aggregate the data so we have a single balance for each day already in the dataset, for each account

In [117]:
temp = acc_stat.groupby(['Account Number' , 'Balance Date'])[['Transaction Value', 'Balance']].sum().reset_index()
temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17221 entries, 0 to 17220
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Account Number     17221 non-null  int64         
 1   Balance Date       17221 non-null  datetime64[ns]
 2   Transaction Value  17221 non-null  float64       
 3   Balance            17221 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 538.3 KB


In [118]:
temp.head(5)

Unnamed: 0,Account Number,Balance Date,Transaction Value,Balance
0,10005367,2023-01-31,0.0,728.25
1,10005367,2023-02-02,-1097.6,-369.35
2,10005367,2023-02-04,77.74,-516.21
3,10005367,2023-02-05,-108.26,-399.87
4,10005367,2023-02-08,875.51,1016.77


### Scaffold the data so each account has a row between 31st Jan and 14th Feb
### Make sure new rows have a null in the Transaction Value field

In [119]:
date_df = pd.DataFrame(columns=['Balance Date'])
date_df['Balance Date'] = pd.date_range(start='31/01/2023', end='14/02/2023')
date_df.head(5)

  date_df['Balance Date'] = pd.date_range(start='31/01/2023', end='14/02/2023')


Unnamed: 0,Balance Date
0,2023-01-31
1,2023-02-01
2,2023-02-02
3,2023-02-03
4,2023-02-04


In [120]:
acc_num = pd.DataFrame(temp['Account Number'])
acc_num.drop_duplicates(inplace=True)
acc_num.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3000 entries, 0 to 17211
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   Account Number  3000 non-null   int64
dtypes: int64(1)
memory usage: 46.9 KB


In [121]:
date_df = acc_num.merge(date_df,how='cross')
date_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 0 to 44999
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Account Number  45000 non-null  int64         
 1   Balance Date    45000 non-null  datetime64[ns]
dtypes: datetime64[ns](1), int64(1)
memory usage: 1.0 MB


In [122]:
acc_df = temp.merge(date_df,how='right', on=['Account Number','Balance Date'])
acc_df.head()

Unnamed: 0,Account Number,Balance Date,Transaction Value,Balance
0,10005367,2023-01-31,0.0,728.25
1,10005367,2023-02-01,,
2,10005367,2023-02-02,-1097.6,-369.35
3,10005367,2023-02-03,,
4,10005367,2023-02-04,77.74,-516.21


In [123]:
id = 0
bal = 0
for i in range(len(acc_df)):
    # Same Account Number as previous i
    if id == acc_df.loc[i,'Account Number']:
        if np.isnan(acc_df.loc[i,'Balance']) == False:
            bal = acc_df.loc[i,'Balance']
        elif np.isnan(acc_df.loc[i,'Balance']) == True:
            acc_df.loc[i,'Balance'] = bal
        if np.isnan(acc_df.loc[i,'Transaction Value']) == True:
            acc_df.loc[i,'Transaction Value'] = 0
    else:
        id = acc_df.loc[i,'Account Number']
        bal = acc_df.loc[i,'Balance']

In [124]:
acc_df.head(10)

Unnamed: 0,Account Number,Balance Date,Transaction Value,Balance
0,10005367,2023-01-31,0.0,728.25
1,10005367,2023-02-01,0.0,728.25
2,10005367,2023-02-02,-1097.6,-369.35
3,10005367,2023-02-03,0.0,-369.35
4,10005367,2023-02-04,77.74,-516.21
5,10005367,2023-02-05,-108.26,-399.87
6,10005367,2023-02-06,0.0,-399.87
7,10005367,2023-02-07,0.0,-399.87
8,10005367,2023-02-08,875.51,1016.77
9,10005367,2023-02-09,0.0,1016.77


### Create a parameter so a particular date can be selected

In [131]:
# Date same as given as example in challenge
d = 1
m = 2
y = 23
date = datetime.strptime(f'{d}-{m}-{y}', f'%d-%m-%y')
print(date)


2023-02-01 00:00:00


In [132]:
temp = acc_df.loc[acc_df['Balance Date'] == date]
temp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3000 entries, 1 to 44986
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Account Number     3000 non-null   int64         
 1   Balance Date       3000 non-null   datetime64[ns]
 2   Transaction Value  3000 non-null   float64       
 3   Balance            3000 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 117.2 KB
