# Transaction Generator
This notebook is used to generate random financial transactions. These random data will be used to test and develop CloudFinance.

## Set Up Environment

In [29]:
import datetime
import random
import pandas as pd

# navigate to the Features folder
import sys
sys.path.append(sys.path[0]+'/../Features') # .. instead of . because we're in the Jupyter Tools folder

# import features
import helper

## Data Structure

The final dataframe will have 7 features:
- Description
- Amount
- Date
- Account
- Third Party

In [30]:
# initiate empty lists, each store the data of one feature above
unique_id_record = []
description_record = []
category_record = []
amount_record = []
date_record = []
account_record = []
third_party_record = []

## Populate the Data

In [31]:
# set up the number of days in a month
months_dict = {1:31, 2:28, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}

# set up the accounts
accounts = ['Chase Freedom Flex', 'Capital One Savor', 'Discover It', 'Amex Blue Cash Preferred', 'Schwab Investor Card']

# set up the categories
categories = ['Transportation', 'Utilities', 'Book', 'Business Services', 'Clothing', 'Education', 'Entertainment',\
    'Fees and Chargers', 'Financial', 'Food and Dining', 'Gift', 'Health and Fitness', 'Home', 'Kids', 'Misc Expenses',\
    'Personal Care', 'Pet', 'Shopping', 'Reimbursable or Refundable', 'Taxes', 'Uncategorized']

# set up the third parties
third_parties = ['Ronald', 'Justine', 'Sophia', 'Cara']

In [32]:
# set up weighted random odds

# random number of transaction a particular day (will randomly choose 1 from this list)
transaction_daily_counts = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\
    2, 2, 2, 2, 2, 2, 2, 3, 3, 3]

# random category for each transaction (will randomly choose 1 from this list)
categories_weighted = ['Transportation', 'Transportation', 'Transportation', 'Transportation', 'Transportation',\
    'Utilities', 'Utilities', 'Utilities', 'Utilities', 'Utilities', 'Utilities', 'Utilities', 'Utilities', 'Utilities',\
    'Book', 'Book', 'Book',\
    'Business Services', 'Business Services', 'Business Services',\
    'Clothing', 'Clothing', 'Clothing', 'Clothing', 'Clothing', 'Clothing',\
    'Education', 'Education', 'Education', 'Education', 'Education', 'Education', 'Education',\
    'Entertainment', 'Entertainment', 'Entertainment', 'Entertainment', 'Entertainment', 'Entertainment', 'Entertainment',\
    'Fees and Chargers',\
    'Financial',\
    'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining','Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining', 'Food and Dining',\
    'Gift', 'Gift', 'Gift', 'Gift', \
    'Health and Fitness', 'Health and Fitness', 'Health and Fitness', 'Health and Fitness', 'Health and Fitness',\
    'Home', 'Home', 'Home', 'Home', 'Home',\
    'Kids', 'Kids', 'Kids', 'Kids', 'Kids', 'Kids', 'Kids', 'Kids',\
    'Misc Expenses', 'Misc Expenses',\
    'Personal Care', 'Personal Care', 'Personal Care',\
    'Pet', 'Pet', 'Pet', 'Pet',\
    'Shopping', 'Shopping', 'Shopping', 'Shopping', 'Shopping', 'Shopping', 'Shopping', 'Shopping', 'Shopping',\
    'Reimbursable or Refundable', 'Reimbursable or Refundable',\
    'Taxes',\
    'Uncategorized', 'Uncategorized', 'Uncategorized', 'Uncategorized']

# random account for each transaction (will randomly choose 1 from this list)
accounts_weighted = ['Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', 'Chase Freedom Flex', \
    'Capital One Savor', 'Capital One Savor', 'Capital One Savor', 'Capital One Savor', \
    'Discover It', 'Discover It', 'Discover It', 'Discover It', 'Discover It', 'Discover It', 'Discover It', 
    'Amex Blue Cash Preferred', 'Amex Blue Cash Preferred', 'Amex Blue Cash Preferred', 'Amex Blue Cash Preferred', 'Amex Blue Cash Preferred', \
    'Schwab Investor Card', 'Schwab Investor Card']

In [33]:
# loop through every single day from 2015-01-01 to 2023-01-31
for year in range(2015, 2024):

    for month in range(1, 13):

        for day in range(1, months_dict[month] + 1):

            # get the date as datetime object
            this_day = datetime.date(year,month,day)

            # number of transactions on this day
            transaction_count = random.choice(transaction_daily_counts)

            for i in range(transaction_count):

                # generate a unique id
                unique_id_record.append(helper.get_unique_id())

                # set the category
                category_record.append(random.choice(categories_weighted))

                # set the amount
                amount_record.append(random.randint(1,50))

                # set the date
                date_record.append(this_day)

                # set the account
                account_record.append(random.choice(accounts_weighted))

                # 15% chance of third party
                if helper.random_from_percentage(15) == True:

                    # set the third party
                    third_party_record.append(random.choice(third_parties))

                else:
                    third_party_record.append('')

                # generate a description
                random_description = "Randomly generated expenses for " + category_record[-1]
                description_record.append(random_description)

In [34]:
# create a dataframe from the lists
expenses_df = pd.DataFrame({\
    'Unique ID':unique_id_record,\
    'Description':description_record,\
    'Category': category_record,\
    'Amount': amount_record,\
    'Date': date_record,\
    'Account': account_record,\
    'Third Party': third_party_record})

# preview the dataframe
expenses_df.head()

Unnamed: 0,Unique ID,Description,Category,Amount,Date,Account,Third Party
0,1676311868042907,Randomly generated expenses for Home,Home,50,2015-01-01,Amex Blue Cash Preferred,
1,1676311868043323,Randomly generated expenses for Utilities,Utilities,25,2015-01-02,Amex Blue Cash Preferred,Sophia
2,1676311868045289,Randomly generated expenses for Food and Dining,Food and Dining,24,2015-01-03,Amex Blue Cash Preferred,
3,1676311868046338,Randomly generated expenses for Kids,Kids,1,2015-01-04,Chase Freedom Flex,
4,1676311868046569,Randomly generated expenses for Entertainment,Entertainment,10,2015-01-04,Amex Blue Cash Preferred,


In [35]:
# export the data
expenses_df.to_csv('../Data/Expenses.csv', index = False)
# commented out to avoid accidental overwrite