In [45]:
from datetime import timedelta, datetime
import pandas as pd
import random
from uuid import uuid4

In [32]:
def generate_id(prefix):
    return prefix + str(uuid4())

In [33]:
# Clients Data Generators

def get_client_id():
    return generate_id('C')

def get_age():
    return random.randint(21,90)

def get_location():
    locations = ['New York, NY', 'San Diego, CA', 'Washington, D.C.', 'Nairobi, Kenya', 'Toronto, Canada', 'Dallas, TX', 
                 'Dubai, UAE', 'Shanghai, China', 'Paris, France', 'Seattle, WA', 'Boston, MA', 'Little Rock, AR', 
                 'Nashville, TN']
    
    return random.choice(locations)

def get_risk_tolerance():
    return random.randint(1, 5)

In [38]:
# Accounts Data Generators

def get_account_id():
    return generate_id('A')

def get_account_type():
    types = ['IRA', 'Taxable Account', 'Trust']
    return random.choice(types)

def get_account_tolerance():
    return random.randint(1, 5)

In [46]:
# Daily Accounts Data Generators

def get_daily_account_id():
    return generate_id('D')

def get_balance_change():
    return random.randint(-5, 5)

def get_start_date():
    start_dates = [datetime(2010, 5, 1).date(), datetime(2011, 4, 3).date(), datetime(2012, 8, 1).date(), 
                   datetime(2016, 9, 1).date(), datetime(2018, 1, 2).date(), datetime(2017, 3, 1).date()]
    
    return random.choice(start_dates)

def get_starting_balance():
    balances = [1000000, 15000000, 2000000, 250000, 800000, 750000, 5000000]
    
    return random.choice(balances)

In [63]:
# Parameters

num_of_clients = 200
max_num_of_accounts_per_client = 3

In [64]:
# Generate clients

client_ids = [get_client_id() for i in range(num_of_clients)]
ages = [get_age() for client in client_ids]
locations = [get_location() for client in client_ids]
client_risk_tolerances = [get_risk_tolerance() for client in client_ids]

client_data = pd.DataFrame({
    'client_id': client_ids,
    'age': ages,
    'location': locations,
    'client_risk_tolerance': client_risk_tolerances
})

client_data.to_csv('clients.csv')

client_data.head()

Unnamed: 0,client_id,age,location,client_risk_tolerance
0,C7294bdca-4701-4013-b7d3-82ebae09db51,32,"Nashville, TN",3
1,C56f6c73b-7f46-451d-ad47-a4ea06a5951d,35,"New York, NY",3
2,C69e34d77-e3b6-4c4b-b43c-5330ff8e74db,66,"New York, NY",2
3,C9ce5840c-c172-44d5-9ece-c816ff0f687c,77,"Toronto, Canada",2
4,C583bae37-1c6e-42c6-bd34-65dca3a6227b,31,"Boston, MA",2


In [65]:
# Generate accounts

account_ids = []
account_types = []
account_tolerances = []
account_client_ids = []

for client in client_ids:
    for a in range(random.randint(1, max_num_of_accounts_per_client)):
        account_ids.append(get_account_id())
        account_types.append(get_account_type())
        account_tolerances.append(get_account_tolerance())
        account_client_ids.append(client)
        
account_data = pd.DataFrame({
    'account_id': account_ids,
    'account_type': account_types,
    'account_risk_tolerance': account_tolerances,
    'client_id': account_client_ids
})

account_data.to_csv('accounts.csv')

account_data.head()

Unnamed: 0,account_id,account_type,account_risk_tolerance,client_id
0,A0b56afc8-750c-4305-9d2b-b7b2cf341647,IRA,2,C7294bdca-4701-4013-b7d3-82ebae09db51
1,Adaf004ca-fbf7-42ce-80ce-b2de20a98b10,IRA,2,C7294bdca-4701-4013-b7d3-82ebae09db51
2,A75493342-6d20-4eac-92f3-5b603830d46b,Trust,1,C56f6c73b-7f46-451d-ad47-a4ea06a5951d
3,A600c47c3-e207-46f9-ba70-9b8c62ce617b,Taxable Account,1,C56f6c73b-7f46-451d-ad47-a4ea06a5951d
4,Ae57ec858-6f67-4e23-8e0a-59900dd258fc,IRA,2,C69e34d77-e3b6-4c4b-b43c-5330ff8e74db


In [66]:
# Generate Daily Account Data

daily_account_data_ids = []
daily_account_ids = []
dates = []
balances = []

for account_id in account_ids:
    date = get_start_date()
    balance = get_starting_balance()
    
    while date < datetime.now().date():
        daily_account_data_ids.append(get_daily_account_id())
        dates.append(date.isoformat())
        balances.append(balance)
        daily_account_ids.append(account_id)
        
        change = (get_balance_change() / 100) * balance
        balance = round(balance + change, 2)
        date += timedelta(days = 1)
        
daily_account_data = pd.DataFrame({
    'data_id': daily_account_data_ids,
    'account_id': daily_account_ids,
    'date': dates,
    'balance': balances
})

daily_account_data.to_csv('daily_account_data.csv')

daily_account_data.head()

Unnamed: 0,data_id,account_id,date,balance
0,Db2de920c-8091-422e-b06a-b642cceeb839,A0b56afc8-750c-4305-9d2b-b7b2cf341647,2012-08-01,2000000.0
1,D3d8fe944-d9e7-462d-9f49-6d8c859e16f4,A0b56afc8-750c-4305-9d2b-b7b2cf341647,2012-08-02,2040000.0
2,Da3959ccb-b7b5-4b04-941d-35f5648cab56,A0b56afc8-750c-4305-9d2b-b7b2cf341647,2012-08-03,2060400.0
3,D30122bf2-7dcb-42dc-9d6a-a209518cd86d,A0b56afc8-750c-4305-9d2b-b7b2cf341647,2012-08-04,2101608.0
4,D427a6b63-4cb2-4543-a52e-cd5311fba937,A0b56afc8-750c-4305-9d2b-b7b2cf341647,2012-08-05,2143640.16


In [67]:
len(daily_account_data)

721884

In [68]:
# Number of accounts
len(account_ids)

396