# FrenchRoastPy aka Transaction Parsing

## Create dataframe to store balances

In [1]:
import pandas as pd
import csv
import json
from decimal import *

In [2]:
# Create a df containing just transactionId, customerId, accountID
df = pd.read_csv('resources/secondset.csv',
                 index_col='transactionId',
                 usecols=[0, 1, 2])

In [3]:
# Drop any row where there is a duplicate combination of customer and account
# This leave only unique customer/account combonations for tracking the balance
df = df.drop_duplicates(subset=['customerId', 'accountId'])

In [4]:
# .set_index creates a multi index, drops the transaction index
# .sort_index to sort index numerically
df = df.set_index(['customerId', 'accountId']).sort_index()

In [5]:
# add a balances column
df['balance'] = 0.00

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,balance
customerId,accountId,Unnamed: 2_level_1
10,20,0.0
10,22,0.0
10,25,0.0
11,21,0.0
11,24,0.0
12,20,0.0
12,21,0.0
12,22,0.0
12,23,0.0
12,24,0.0


## Calculate balances per account

In [9]:
# For each line in csv determine transaction type
# Increment or decrement the balance in the appropriate account 
# This sometimes cause a warning that a negative number "has a dtype incompatible with float64"

In [8]:
with open('resources/secondset.csv', 'r') as csv_file:
    file = csv.DictReader(csv_file)

    for row in file:
        customer_id = int(row['customerId'])
        account_id = int(row['accountId'])
        transaction_type = row['transactionType']
        amount = Decimal(row['amount'])

        target_cell = (customer_id, account_id), ('balance')
        current_balance = Decimal(df.loc[target_cell])
        
        if transaction_type == 'deposit':
            df.loc[target_cell] = current_balance + Decimal(amount)
        if transaction_type == 'withdrawal':
            df.loc[target_cell] = current_balance - Decimal(amount)

  df.loc[target_cell] = current_balance - Decimal(amount)


In [10]:
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,balance
customerId,accountId,Unnamed: 2_level_1
10,20,-238.24
10,22,432.93
10,25,583.73
11,21,812.45
11,24,-542.67
12,20,-907.89
12,21,-976.01
12,22,-1695.83
12,23,-374.9
12,24,-697.57


## Convert to properly formatted JSON

In [20]:
# The above df could be converted to json, but it's not very readable
# My resulting json matches the lab's target json in structure and data, but not in spacing

In [21]:
customers = {}

In [22]:
for row in df.itertuples():
    customer_id = row.Index[0]
    account_id = row.Index[1]
    balance = row.balance
    
    if customer_id not in customers:
        customers[customer_id] = {'id': customer_id, 'accounts': []} 
    
    account_data = {'account_id': account_id, 'balance': float(balance)}  
    customers[customer_id]['accounts'].append(account_data)

In [23]:
customer_list = list(customers.values())

In [24]:
# Convert the list to JSON
json_output = json.dumps(customer_list, indent=4)
 
# Writing to sample.json
with open('resources/output1.json', 'w') as output:
    output.write(json_output)