In [1]:
import streamlit as st

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
from datetime import timedelta, date
import random

In [2]:
CATEGORIES_DATA = {
    "Food & Dining": {
        "Sub_Categories": {
            "Groceries": {
                "keywords": ["supermarket", "grocery shopping", "vegetables", "meat", "rice", "kitchen supplies"],
                "merchants": ["Market", "Supermarket", "Mart"],
                "amounts": {"Student": (5, 20), "Employee": (15, 50), "Family": (30, 150)},
                "frequency": {"Student": "weekly", "Employee": "bi-weekly", "Family": "daily/weekly"}
            },
            "Restaurant Meals": {
                "keywords": ["restaurant", "dinner out", "lunch", "cafe meal", "Khmer food", "Western food"],
                "merchants": ["Canteen", "Restaurant", "Street Food Stall", "Eatary"],
                "amounts": {"Student": (2, 10), "Employee": (5, 30), "Family": (10, 80)},
                "frequency": {"Student": "daily", "Employee": "daily/weekly", "Family": "weekly/bi-weekly"}
            },
            "Snacks & Drinks": {
                "keywords": ["coffee", "tea", "snack", "biscuit", "water", "juice", "beverage"],
                "merchants": ["Mart", "Coffee Shop", "Local Convenience Store"],
                "amounts": {"Student": (0.5, 5), "Employee": (1, 8), "Family": (2, 15)},
                "frequency": {"Student": "daily", "Employee": "daily", "Family": "daily"}
            },
        }
    },
    "Transportation": {
        "Sub_Categories": {
            "Ride-Hailing": {
                "keywords": ["grab ride", "passapp", "taxi app", "Rickshaw", "moto-taxi", "van", "bus"],
                "merchants": ["Express Company" "Local Driver"],
                "amounts": {"Student": (2, 7), "Employee": (3, 10), "Family": (5, 15)},
                "frequency": {"Student": "daily/weekly", "Employee": "daily/weekly", "Family": "weekly/monthly"}
            },
            "Fuel": {
                "keywords": ["gas", "petrol", "fuel station"],
                "merchants": ["Filling station"],
                "amounts": {"Student": (3, 10), "Employee": (5, 20), "Family": (10, 40)},
                "frequency": {"Student": "weekly", "Employee": "weekly", "Family": "weekly/bi-weekly"}
            },
            "Public Transport": {
                "keywords": ["bus ticket", "public bus"],
                "merchants": ["Phnom Penh City Bus"],
                "amounts": {"Student": (0.5, 1), "Employee": (0.5, 1), "Family": (1, 3)},
                "frequency": {"Student": "daily", "Employee": "daily/weekly", "Family": "monthly"}
            },
            "Bicycle/Motorbike/Vehical Maintenance": {
                "keywords": ["bike repair", "motorbike service", "tire change", "oil change", "car service"],
                "merchants": ["Mechanic Station"],
                "amounts": {"Student": (1, 15), "Employee": (5, 30), "Family": (10, 150)},
                "frequency": {"Student": "infrequent", "Employee": "infrequent", "Family": "infrequent"}
            }
        }
    },
    "Education": {
        "Sub_Categories": {
            "Tuition Fees": {
                "keywords": ["school fee", "university tuition", "course payment"],
                "merchants": ["Private Education Sector", "Public Education Sector", "Online Course Platform",],
                "amounts": {"Student": (50, 800), "Employee": (100, 300), "Family": (300, 5000)}, # Employee might pay for own courses, Family for kids
                "frequency": {"Student": "monthly/termly/annualy", "Employee": "infrequent", "Family": "monthly/termly"}
            },
            "Books & Supplies": {
                "keywords": ["textbook", "notebook", "stationery", "school bag", "photocopy"],
                "merchants": ["Bookstore", "Stationery Shop"],
                "amounts": {"Student": (2, 20), "Employee": (0, 0), "Family": (5, 50)},
                "frequency": {"Student": "monthly/termly", "Employee": "infrequent", "Family": "monthly"}
            }
        }
    },
    "Housing & Utilities": { # More prominent for Employees & Family
        "Sub_Categories": {
            "Rent": {
                "keywords": ["monthly rent", "apartment rent", "house rent"],
                "merchants": ["Landlord", "Property Agent"],
                "amounts": {"Student": (100, 300), "Employee": (250, 700), "Family": (400, 1500)},
                "frequency": "monthly"
            },
            "Electricity Bill": {
                "keywords": ["electricity", "EDC bill"],
                "merchants": ["EDC Cambodia"],
                "amounts": {"Student": (10, 40), "Employee": (30, 100), "Family": (50, 200)},
                "frequency": "monthly"
            },
            "Water Bill": {
                "keywords": ["water bill"],
                "merchants": ["Water Supply Authority"],
                "amounts": {"Student": (2, 8), "Employee": (5, 15), "Family": (15, 50)},
                "frequency": "monthly"
            },
            "Internet Bill": {
                "keywords": ["internet bill", "wifi", "internet service"],
                "merchants": ["Ezecom", "Smart", "Metfone", "Cellcard"],
                "amounts": {"Student": (5, 20), "Employee": (15, 30), "Family": (20, 40)},
                "frequency": "monthly"
            }
        }
    },

    "Healthcare": {
        "Sub_Categories": {
            "Pharmacy": {
                "keywords": ["medicine", "pharmacy", "flu medicine", "pain killer"],
                "merchants": ["Pharmacy"],
                "amounts": {"Student": (2, 10), "Employee": (5, 20), "Family": (10, 50)},
                "frequency": "monthly/infrequent"
            },
            "Doctor Visit": {
                "keywords": ["doctor visit", "clinic", "check-up"],
                "merchants": ["Clinic", "Hospital"],
                "amounts": {"Student": (10, 30), "Employee": (20, 50), "Family": (30, 100)},
                "frequency": "infrequent"
            }
        }
    },
    "Entertainment & Leisure": {
        "Sub_Categories": {
            "Cinema/Movies": {
                "keywords": ["cinema ticket", "movie night", "Legend Cinema", "Major Cineplex"],
                "merchants": ["Cinema", "Movie Theater"],
                "amounts": {"Student": (4, 8), "Employee": (5, 10), "Family": (10, 25)},
                "frequency": {"Student": "monthly", "Employee": "monthly", "Family": "monthly/bi-monthly"}
            },
            "Social Outings": {
                "keywords": ["karaoke", "bar", "cafe meeting", "hangout", "party", "travel"],
                "merchants": ["KTV", "Bar", "Cafe"],
                "amounts": {"Student": (5, 20), "Employee": (10, 50), "Family": (15, 70)},
                "frequency": {"Student": "weekly/bi-weekly", "Employee": "weekly/bi-weekly", "Family": "monthly"}
            },
            "Hobbies/Sports": {
                "keywords": ["gym membership", "sports gear", "hobby supplies", "yoga class"],
                "merchants": ["Fitness Center", "Sports Shop", "Sports Club"],
                "amounts": {"Student": (10, 30), "Employee": (20, 60), "Family": (30, 100)},
                "frequency": {"Student": "monthly", "Employee": "monthly", "Family": "monthly"}
            }
        }
    },
    "Shopping": {
        "Sub_Categories": {
            "Clothing & Accessories": {
                "keywords": ["new shirt", "dress", "shoes", "bag", "jewelry"],
                "merchants": ["Mall", "Boutique", "Market Stall"],
                "amounts": {"Student": (10, 40), "Employee": (20, 80), "Family": (30, 150)},
                "frequency": "monthly/bi-monthly"
            },
            "Electronics": {
                "keywords": ["new phone", "headphones", "charger", "laptop", "gadget"],
                "merchants": ["Electrotics Shop"],
                "amounts": {"Student": (20, 200), "Employee": (50, 500), "Family": (100, 1000)},
                "frequency": "infrequent"
            },
            "Household Items": { # Mostly for Family, some for Employees
                "keywords": ["detergent", "kitchenware", "cleaning supplies", "furniture", "appliances"],
                "merchants": ["Supermarket", "Mall", "Hardware Store"],
                "amounts": {"Student": (0, 0), "Employee": (5, 30), "Family": (20, 100)},
                "frequency": {"Student": "none", "Employee": "monthly", "Family": "weekly/monthly"}
            }
        }
    },
    "Personal Care": {
        "Sub_Categories": {
            "Hair/Beauty": {
                "keywords": ["haircut", "salon", "barber", "manicure", "pedicure"],
                "merchants": ["Local Salon", "Barber Shop"],
                "amounts": {"Student": (5, 15), "Employee": (10, 30), "Family": (15, 40)},
                "frequency": "monthly/bi-monthly"
            },
            "Toiletries": {
                "keywords": ["shampoo", "soap", "toothpaste", "skincare"],
                "merchants": ["Supermarket", "Market", "Local Shop"],
                "amounts": {"Student": (2, 15), "Employee": (5, 20), "Family": (30, 100)},
                "frequency": "monthly"
            }
        }
    },
    "Miscellaneous": {
        "Sub_Categories": {
            "Other": {
                "keywords": ["miscellaneous", "random expense", "unknown", "donation"],
                "merchants": ["Various"],
                "amounts": {"Student": (1, 10), "Employee": (2, 20), "Family": (5, 50)},
                "frequency": "daily/weekly"
            }
        }
    }
}

PAYMENT_METHODS_COMMON = ["Cash", "Bank App", "Credit Card", "Debit Card"]
CURRENCY = "USD" # Use USD as the primary recorded currency, with the understanding KHR is common for small transactions

# You can define "user profiles" with their group, gender, and income/spending tendencies
USER_PROFILES = [
    {"user_id": "USR_S_M_001", "group": "Student", "gender": "Male", "income_level": "low", "freq_multiplier": 1.0},
    {"user_id": "USR_S_M_002", "group": "Student", "gender": "Male", "income_level": "low", "freq_multiplier": 0.9},
    {"user_id": "USR_S_F_003", "group": "Student", "gender": "Female", "income_level": "medium", "freq_multiplier": 1.1},
    {"user_id": "USR_S_F_004", "group": "Student", "gender": "Female", "income_level": "low", "freq_multiplier": 0.8},
    {"user_id": "USR_E_M_005", "group": "Employee", "gender": "Male", "income_level": "medium", "freq_multiplier": 1.2},
    {"user_id": "USR_E_M_006", "group": "Employee", "gender": "Male", "income_level": "high", "freq_multiplier": 1.0},
    {"user_id": "USR_E_F_007", "group": "Employee", "gender": "Female", "income_level": "medium", "freq_multiplier": 1.1},
    {"user_id": "USR_E_F_008", "group": "Employee", "gender": "Female", "income_level": "high", "freq_multiplier": 0.9},
    {"user_id": "USR_F_M_009", "group": "Family", "gender": "Male", "income_level": "high", "freq_multiplier": 1.5},
    {"user_id": "USR_F_M_010", "group": "Family", "gender": "Male", "income_level": "medium", "freq_multiplier": 1.1},
    {"user_id": "USR_F_F_011", "group": "Family", "gender": "Female", "income_level": "medium", "freq_multiplier": 1.2},
    {"user_id": "USR_F_F_012", "group": "Family", "gender": "Female", "income_level": "high", "freq_multiplier": 1.3},
]

In [4]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import timedelta, date

fake = Faker('en_US')

all_transactions = []
start_date_simulation = date(2024, 7, 1) # Start from July 1, 2024
end_date_simulation = date(2025, 6, 30)   # End on June 30, 2025 (1 full year)

current_item_id = 1

for user_profile in USER_PROFILES: # USER_PROFILES needs to be defined from previous step
    user_id = user_profile['user_id']
    group = user_profile['group']
    freq_multiplier = user_profile['freq_multiplier']

    # Iterate through each day in the simulation period
    current_day = start_date_simulation
    while current_day <= end_date_simulation:
        # Determine average transactions per day for this user/group, allowing for some days with no transactions
        if group == "Student":
            avg_daily_transactions = 2 + random.uniform(-0.5, 0.5) * freq_multiplier
        elif group == "Employee":
            avg_daily_transactions = 1.5 + random.uniform(-0.5, 0.5) * freq_multiplier
        else: # Family
            avg_daily_transactions = 3 + random.uniform(-1, 1) * freq_multiplier

        # Use Poisson distribution for more realistic random counts of events
        num_transactions_for_this_day = np.random.poisson(max(0, avg_daily_transactions))

        for _ in range(num_transactions_for_this_day):
            # The 'entry_date' for this specific transaction is simply 'current_day'
            transaction_date = current_day # Assign the date for this transaction

            # Select a random main category based on group likelihood (as defined in previous response)
            main_category_name = random.choices(
                list(CATEGORIES_DATA.keys()),
                weights=[
                    0.40 if cat == "Food & Dining" else # Always high
                    0.20 if cat == "Transportation" else
                    0.15 if cat == "Communication" else
                    0.10 if cat == "Education" and group == "Student" else # Students more education
                    0.10 if cat == "Housing & Utilities" and (group == "Employee" or group == "Family") else # Emp/Fam more housing
                    0.05 if cat == "Entertainment & Leisure" else
                    0.03 if cat == "Shopping" else
                    0.02 if cat == "Healthcare" else
                    0.02 if cat == "Personal Care" else
                    0.01 # Miscellaneous
                    for cat in CATEGORIES_DATA.keys()
                ]
            )[0]
            
            main_category_data = CATEGORIES_DATA[main_category_name]
            
            # Select a sub-category within the chosen main category
            sub_category_name = random.choice(list(main_category_data["Sub_Categories"].keys()))
            sub_category_data = main_category_data["Sub_Categories"][sub_category_name]

            # Get amount range specific to the group
            amount_range = sub_category_data["amounts"].get(group, (1, 10))
            amount = round(random.uniform(*amount_range), 2)

            item_description_raw = random.choice(sub_category_data["keywords"])
            merchant_name = random.choice(sub_category_data["merchants"])

            # Add some variability to the description for model generalization
            if random.random() < 0.2:
                item_description_raw = fake.text(max_nb_chars=20) + " " + item_description_raw
            if random.random() < 0.1:
                item_description_raw = item_description_raw.replace('a', 'o', 1) if 'a' in item_description_raw else item_description_raw

            all_transactions.append({
                "Item_id": f"ITEM_{current_item_id:06d}",
                "user_id": user_id,
                "entry_date": transaction_date.isoformat(), # Use the 'transaction_date' for this item
                "amount": amount,
                "currency": CURRENCY,
                "merchant_name": merchant_name,
                "transaction_type": "Expense",
                "category_label": main_category_name,
                "sub_category": sub_category_name,
                "payment_method": random.choice(PAYMENT_METHODS_COMMON),
                "item_description_raw": item_description_raw
            })
            current_item_id += 1
            
        current_day += timedelta(days=1) # Move to the next day for the simulation

# Optional: Add infrequent, large expenses (like annual tuition)
# This logic still holds, but adjust the date generation to match the new loop's 'current_day'
# Example: For family users, add a "School Fees" transaction in August/September.
for user_profile in USER_PROFILES:
    user_id = user_profile['user_id']
    group = user_profile['group']

    if group in ["Student", "Family"]:
        # Simulate school fees once a year (e.g., end of August 2024 and Jan 2025)
        if user_id in ["USR_S_M_001", "USR_S_F_003", "USR_F_M_009", "USR_F_F_011"]: # Specific users
            # For 2024-2025 academic year
            fee_amount_1 = random.uniform(200, 1000) if group == "Student" else random.uniform(500, 2000) # USD
            all_transactions.append({
                "Item_id": f"ITEM_{current_item_id:06d}",
                "user_id": user_id,
                "entry_date": date(2024, 8, random.randint(25, 30)).isoformat(), # Late August 2024
                "amount": round(fee_amount_1, 2),
                "currency": CURRENCY,
                "merchant_name": random.choice(["University of Cambodia", "RPP", "International School XYZ", "Local High School"]),
                "transaction_type": "Expense",
                "category_label": "Education",
                "sub_category": "Tuition Fees",
                "payment_method": random.choice(["ABA Pay", "Bank Transfer", "Cash"]),
                "item_description_raw": random.choice(["University tuition fee", "School fees for semester 1", "Annual school payment"])
            })
            current_item_id += 1
            
            fee_amount_2 = random.uniform(200, 1000) if group == "Student" else random.uniform(500, 2000) # USD
            all_transactions.append({
                "Item_id": f"ITEM_{current_item_id:06d}",
                "user_id": user_id,
                "entry_date": date(2025, 1, random.randint(20, 28)).isoformat(), # Late Jan 2025
                "amount": round(fee_amount_2 * random.uniform(0.9, 1.1), 2),
                "currency": CURRENCY,
                "merchant_name": random.choice(["University of Cambodia", "RPP", "International School XYZ", "Local High School"]),
                "transaction_type": "Expense",
                "category_label": "Education",
                "sub_category": "Tuition Fees",
                "payment_method": random.choice(["ABA Pay", "Bank Transfer", "Cash"]),
                "item_description_raw": random.choice(["University tuition fee", "School fees for semester 2", "Semester 2 payment"])
            })
            current_item_id += 1


df_final = pd.DataFrame(all_transactions)
df_final['entry_date'] = pd.to_datetime(df_final['entry_date']) # Convert to datetime objects
df_final = df_final.sort_values(by=['user_id', 'entry_date']).reset_index(drop=True)


output_path = r'D:\AMS_club\Projects\Personal_Finance_Coach\data\raw\synthetic_expense_transactions.csv'
df_final.to_csv(output_path, index=False)
print(f"Generated {len(df_final)} synthetic multi-group expense transactions and saved to {output_path}")

print("\nSample Data:")
print(df_final.head())
print("\nSummary Statistics:")
print(df_final.groupby('user_id').size().reset_index(name='transaction_count'))
print(df_final.groupby(['category_label', 'sub_category']).size().reset_index(name='count').sort_values(by='count', ascending=False).head(10))

Generated 9293 synthetic multi-group expense transactions and saved to D:\AMS_club\Projects\Personal_Finance_Coach\data\raw\synthetic_expense_transactions.csv

Sample Data:
       Item_id      user_id entry_date  amount currency     merchant_name  \
0  ITEM_003894  USR_E_F_007 2024-07-01   27.93      USD           Canteen   
1  ITEM_003895  USR_E_F_007 2024-07-01   49.21      USD              Mart   
2  ITEM_003896  USR_E_F_007 2024-07-01   20.45      USD        Restaurant   
3  ITEM_003897  USR_E_F_007 2024-07-02    8.71      USD  Mechanic Station   
4  ITEM_003898  USR_E_F_007 2024-07-02    2.94      USD       Coffee Shop   

  transaction_type  category_label                           sub_category  \
0          Expense   Food & Dining                       Restaurant Meals   
1          Expense   Food & Dining                              Groceries   
2          Expense   Food & Dining                       Restaurant Meals   
3          Expense  Transportation  Bicycle/Motorbike/Ve

In [None]:
# Initialize Faker for realistic data
fake = Faker('en_US') # Or 'km_KH' if Faker has good support for Khmer names/phrases (less critical for descriptions)

# --- Configuration for synthetic data ---
NUM_USERS = 5 # Number of synthetic students
TRANSACTIONS_PER_USER_PER_MONTH = 60 # Avg ~2 items per day, adjust as needed for data volume
SIMULATION_MONTHS = 12 # Simulate 1 year of data

# Define student-specific categories (simplified for example, use your full list)
CATEGORIES = {
    "Food: School Canteen": {"keywords": ["canteen", "school food", "lunch at school", "student lunch"], "merchants": ["School Canteen"], "amounts": (1.0, 3.0)},
    "Food: Street Food": {"keywords": ["noodle soup", "fried rice", "street food", "local stall"], "merchants": ["Street Food Stall", "Local Eatery"], "amounts": (1.5, 4.0)},
    "Food: Cafe": {"keywords": ["coffee", "iced tea", "bubble tea", "cafe", "brown coffee"], "merchants": ["Brown Coffee", "Starbucks", "Tube Cafe"], "amounts": (1.5, 5.0)},
    "Transportation: Tuk-Tuk/Moto": {"keywords": ["tuk-tuk", "moto", "taxi", "ride home"], "merchants": ["Local Tuk-Tuk", "Moto-Dop Driver"], "amounts": (1.0, 5.0)},
    "Transportation: Ride-Hailing": {"keywords": ["grab", "passapp", "grab ride", "passapp car"], "merchants": ["Grab", "PassApp"], "amounts": (2.0, 8.0)},
    "Communication: Phone Credit": {"keywords": ["top-up", "data plan", "phone credit", "smart card"], "merchants": ["Smart Axiata", "Cellcard", "Metfone"], "amounts": (1.0, 5.0)},
    "Education: Supplies": {"keywords": ["pen", "notebook", "textbook", "photocopy", "school supplies"], "merchants": ["Bookstore ABC", "School Shop"], "amounts": (1.0, 30.0)},
    "Entertainment: Cinema": {"keywords": ["cinema ticket", "movie", "major cineplex"], "merchants": ["Major Cineplex", "Legend Cinema"], "amounts": (5.0, 10.0)},
    "Shopping: Apparel": {"keywords": ["t-shirt", "jeans", "shoes", "clothes"], "merchants": ["Local Market Stall", "Aeon Mall Shop"], "amounts": (10.0, 50.0)},
    "Miscellaneous: Other": {"keywords": ["misc", "random", "other item"], "merchants": ["Various Shops"], "amounts": (1.0, 10.0)},
}

PAYMENT_METHODS = ["Cash", "ABA Pay", "Wing", "Card", "ACLEDA Pay"] # Reflect common payment methods in Cambodia

all_transactions = []
start_date = pd.to_datetime('2024-01-01') # Start date for simulation

for i in range(NUM_USERS):
    user_id = f"STU_{i+1:03d}"
    
    # Simulate some basic user preferences/biases
    user_bias = {
        "food_preference": random.choice(["canteen", "street_food", "cafe", "mixed"]),
        "transport_preference": random.choice(["moto", "ride_hailing", "mixed"]),
        "spending_level": random.uniform(0.8, 1.2) # To vary amounts slightly per user
    }

    for month_offset in range(SIMULATION_MONTHS):
        current_month_start = start_date + pd.DateOffset(months=month_offset)
        current_month_end = current_month_start + pd.DateOffset(months=1) - timedelta(days=1)
        
        # Simulate ~TRANSACTIONS_PER_USER_PER_MONTH transactions spread across the month
        for _ in range(TRANSACTIONS_PER_USER_PER_MONTH):
            entry_date = fake.date_between(start_date=current_month_start, end_date=current_month_end)
            entry_time = fake.time()
            
            chosen_category_name = random.choices(
                list(CATEGORIES.keys()), 
                weights=[
                    0.3 if "Food" in c else 0.2 if "Transportation" in c else 0.15 if "Communication" in c else 0.1 if "Education" in c else 0.1 if "Entertainment" in c else 0.05 if "Shopping" in c else 0.05
                    for c in CATEGORIES.keys()
                ], k=1
            )[0]
            chosen_category_data = CATEGORIES[chosen_category_name]
            
            item_description_raw = random.choice(chosen_category_data["keywords"])
            merchant_name = random.choice(chosen_category_data["merchants"])
            amount = round(random.uniform(*chosen_category_data["amounts"]) * user_bias["spending_level"], 2)
            
            payment_method = random.choice(PAYMENT_METHODS)
            notes = "" # Can add more sophisticated note generation

            all_transactions.append({
                "line_item_id": f"LI{len(all_transactions)+1:05d}",
                "user_id": user_id,
                "entry_date": entry_date,
                "entry_time": entry_time,
                "item_description_raw": item_description_raw,
                "amount": amount,
                "transaction_type": "Expense", # Explicitly set to Expense
                "category_label": chosen_category_name,
                "merchant_name": merchant_name, # This is the ground truth for merchant extraction
                "payment_method": payment_method,
                "notes": notes
            })

df = pd.DataFrame(all_transactions)

# --- Save the generated data ---
output_path = r'D:\AMS_club\Projects\Personal_Finance_Coach\data\raw\synthetic_expense_transactions.csv'
df.to_csv(output_path, index=False)
print(f"Generated {len(df)} synthetic expense transactions and saved to {output_path}")

# Display a sample
print(df.head())
print(df.info())

Generated 3600 synthetic expense transactions and saved to D:\AMS_club\Projects\Personal_Finance_Coach\data\raw\synthetic_expense_transactions.csv
  line_item_id  user_id  entry_date entry_time item_description_raw  amount  \
0      LI00001  STU_001  2024-01-23   03:58:08      school supplies   20.05   
1      LI00002  STU_001  2024-01-16   19:18:29              canteen    1.69   
2      LI00003  STU_001  2024-01-01   00:39:43             iced tea    3.58   
3      LI00004  STU_001  2024-01-28   23:28:02           fried rice    3.06   
4      LI00005  STU_001  2024-01-10   21:37:46              t-shirt   12.69   

  transaction_type        category_label      merchant_name payment_method  \
0          Expense   Education: Supplies        School Shop           Wing   
1          Expense  Food: School Canteen     School Canteen           Cash   
2          Expense            Food: Cafe          Tube Cafe     ACLEDA Pay   
3          Expense     Food: Street Food  Street Food Stall       