### Raw data processing

In [46]:
import csv

def read_transactions():
    with open('transactions.csv', mode='r', newline='') as file:
        # Create a CSV reader object
        csv_reader = csv.reader(file)

        # Skip the header row
        next(csv_reader)
        
        # Create a list to store the transactions
        transactions = []

        # Iterate over each row in the CSV file
        for row in csv_reader:
            # Clean all rows
            row = [cell.strip() for cell in row]

            # Create a transaction
            transaction = {}
            transaction["date"] = row[0]
            transaction["type"] = row[1].lower()
            transaction["product_type"] = row[2]
            transaction["product_name"] = row[3]
            transaction["product_id"] = row[4].upper()
            transaction["units"] = int(row[5]) if row[5] else None
            transaction["total"] = abs(float(row[6])) if row[6] else None
            
            # Append the transction to the list
            transactions.append(transaction)

    transactions = process_transactions(transactions)

    return transactions

def process_transactions(transactions):
    # Create a dictionary to store the total sales for each transaction type
    processed_transactions = {}

    # Iterate over each transaction in the list
    for transaction in transactions:
        # copy all data from transaction to new object
        pt = transaction.copy()

        # Do manual manipulation for specific transaction types
        if transaction["type"] == "buy" or transaction["type"] == "sell":
            pt["price_per_unit"] = transaction["total"] / transaction["units"]
        else:
            pt = transaction

        # Check if the transaction type is already in the dictionary
        if transaction["type"] in processed_transactions:
            # Add the processed transaction to the list
            processed_transactions[pt["type"]].append(pt)
        else:
            # Create a new key-value pair
            processed_transactions[pt["type"]] = [pt]

    return processed_transactions

# Read transactions
transactions = read_transactions()

### Buy and Sell transactions

In [47]:
def get_buy_and_sell_summary(transactions):
    # Create a dictionary to store the total sales for each transaction type
    summary = {}

    # Iterate over each transaction type in the dictionary
    for transaction_type, transaction_list in transactions.items():
        # Create a dictionary to store the total sales for each product type
        product_type_summary = {}

        # Iterate over each transaction in the list
        for transaction in transaction_list:
            product_id = transaction["product_id"]
            units = transaction["units"]
            total = transaction["total"]

            # Check if the product type is already in the dictionary
            if product_id in product_type_summary:
                # Add the total sales to the existing value
                product_type_summary[product_id]["units"] += units
                product_type_summary[product_id]["total"] += total
            else:
                # Create a new key-value pair
                product_type_summary[product_id] = {}
                product_type_summary[product_id]["units"] = units
                product_type_summary[product_id]["total"] = total

        # Add the product type summary to the main summary dictionary
        summary[transaction_type] = product_type_summary

    # Print the summary
    for transaction_type, product_type_summary in summary.items():
        print(f"{transaction_type.capitalize()} transactions:")
        for product_type, product in product_type_summary.items():
            product["price_per_unit"] = product["total"] / product["units"]
            print(f"\t{product_type}:")
            print(f"\t\tUnits: {product['units']}")
            print(f"\t\tTotal: ${product['total']:.2f}")
            print(f"\t\tPrice per unit (AVG): ${product["price_per_unit"]:.2f}")
        print()

# Get buy and sell transactions
buy_and_sell_transactions = {k: v for k, v in transactions.items() if k in ["buy", "sell"]}

buy_and_sell_summary = get_buy_and_sell_summary(buy_and_sell_transactions)

Sell transactions:
	VGS:
		Units: 7
		Total: $876.19
		Price per unit (AVG): $125.17
	VAS:
		Units: 9
		Total: $886.05
		Price per unit (AVG): $98.45
	VDHG:
		Units: 1
		Total: $64.70
		Price per unit (AVG): $64.70

Buy transactions:
	VDHG:
		Units: 27
		Total: $1739.40
		Price per unit (AVG): $64.42
	VGS:
		Units: 223
		Total: $25057.87
		Price per unit (AVG): $112.37
	VAS:
		Units: 250
		Total: $23523.91
		Price per unit (AVG): $94.10



### Display in nice table format

In [48]:
import pandas as pd
from IPython.display import display

# show transactions in a properly formatted pandas table
df = pd.DataFrame(buy_and_sell_transactions["sell"])
display(df)

Unnamed: 0,date,type,product_type,product_name,product_id,units,total,price_per_unit
0,2024-07-22,sell,ETF,Vanguard Diversified High Growth Index ETF,VGS,7,876.19,125.17
1,2024-07-22,sell,ETF,Vanguard Diversified High Growth Index ETF,VAS,9,886.05,98.45
2,2024-07-22,sell,ETF,Vanguard Diversified High Growth Index ETF,VDHG,1,64.7,64.7
