# FrenchRoastPy aka Transaction Parsing

## Create dataframe to store balances

In [1]:
import pandas as pd
import csv
import json

In [2]:
df = pd.read_csv('resources/transactions.csv',
                 index_col='transactionId',
                 usecols=[0, 1, 2])

In [3]:
# Drop any row where there is a duplicate combination of customer and account
df = df.drop_duplicates(subset=['customerId', 'accountId'])

In [4]:
# .set_index creates a multi index
# .sort_index for readability
df = df.set_index(['customerId', 'accountId']).sort_index()

In [5]:
# add a balances column
df['balance'] = 0.0

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,balance
customerId,accountId,Unnamed: 2_level_1
100,10000,0.0
100,10001,0.0
100,10002,0.0
101,10000,0.0
101,10001,0.0
...,...,...
119,10001,0.0
119,10002,0.0
120,10000,0.0
120,10001,0.0


## Calculate balances per account

In [7]:
# My initial idea of using dfs as source and target may be a bad idea. Iterating over rows in pandas is slow and not recommended.
# Next idea, read the csv directly and store results in a df. If that works I can clean up the df creation process.

In [8]:
# for each line in csv
# determine transaction type
# increment or decrement balance in appropriate account 

In [9]:
with open('resources/transactions.csv', 'r') as csv_file:
    file = csv.DictReader(csv_file)

    for row in file:
        customer_id = int(row['customerId'])
        account_id = int(row['accountId'])
        transaction_type = row['transactionType']
        amount = float(row['amount'])

        target_cell = (customer_id, account_id), ('balance')
        current_balance = df.loc[target_cell]
        
        if transaction_type == 'deposit':
            df.loc[target_cell] += amount
        if transaction_type == 'withdrawal':
            df.loc[target_cell] =- amount

In [14]:
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,balance
customerId,accountId,Unnamed: 2_level_1
100,10000,-153.89
100,10001,-292.33
100,10002,-359.48
101,10000,1554.84
101,10001,-242.81
101,10002,-546.97
102,10000,-953.82
102,10001,780.46
102,10002,361.44
103,10000,-967.17


## Convert to properly formatted JSON

In [11]:
customers = {}

In [12]:
for row in df.itertuples():
    customer_id = row.Index[0]
    account_id = row.Index[1]
    balance = row.balance
    
    if customer_id not in customers:
        customers[customer_id] = {'id': customer_id, 'accounts': []} 
    
    account_data = {'account_id': account_id, 'balance': balance}  
    customers[customer_id]['accounts'].append(account_data)
        
    
customers

{100: {'id': 100,
  'accounts': [{'account_id': 10000, 'balance': -153.89},
   {'account_id': 10001, 'balance': -292.33},
   {'account_id': 10002, 'balance': -359.48}]},
 101: {'id': 101,
  'accounts': [{'account_id': 10000, 'balance': 1554.84},
   {'account_id': 10001, 'balance': -242.81},
   {'account_id': 10002, 'balance': -546.97}]},
 102: {'id': 102,
  'accounts': [{'account_id': 10000, 'balance': -953.82},
   {'account_id': 10001, 'balance': 780.46},
   {'account_id': 10002, 'balance': 361.44000000000005}]},
 103: {'id': 103,
  'accounts': [{'account_id': 10000, 'balance': -967.17},
   {'account_id': 10001, 'balance': -292.21},
   {'account_id': 10002, 'balance': -886.91}]},
 104: {'id': 104,
  'accounts': [{'account_id': 10000, 'balance': 745.77},
   {'account_id': 10001, 'balance': 112.75},
   {'account_id': 10002, 'balance': 535.95}]},
 105: {'id': 105,
  'accounts': [{'account_id': 10000, 'balance': -268.39},
   {'account_id': 10001, 'balance': 1680.17},
   {'account_id': 100

In [13]:
customer_list = list(customers.values())


# Convert the list to JSON
json_output = json.dumps(customer_list, indent=4)
 
# Writing to sample.json
with open('resources/output1.json', 'w') as output:
    output.write(json_output)