In [13]:
import pandas as pd
import random

num_links = 1000 # number of links to generate
num_ind_accts = 100 # number of individual accounts
num_biz_accts = 10 # number of businuess accounts

txn_types = ["retail_purchase","supplier","salary"]
txn_proportions_dict = [80, 15, 5] # !!!weight correspond to order of txn_types above!!!

def create_accounts(num_ind_accts, num_biz_accts):
    tot_accts = num_ind_accts + num_biz_accts
    acct_ids = []
    for acct_idx in range(tot_accts):
        if acct_idx < num_ind_accts:
            acct_ids.append("I_" + str(acct_idx) + "_curr")
        else:
            acct_ids.append("B_" + str(acct_idx) + "_curr")
    return acct_ids
                
acct_ids = create_accounts(num_ind_accts, num_biz_accts)
ind_acct_ids = [x for x in acct_ids if x[0]=='I']
biz_acct_ids = [x for x in acct_ids if x[0]=='B']

link_types_list = random.choices(txn_types, weights=txn_proportions_dict, k=num_links)   


In [10]:
biz_acct_ids

['B_100_curr',
 'B_101_curr',
 'B_102_curr',
 'B_103_curr',
 'B_104_curr',
 'B_105_curr',
 'B_106_curr',
 'B_107_curr',
 'B_108_curr',
 'B_109_curr']

In [28]:
def create_retail_link(ind_acct_ids, biz_acct_ids):
    link_features_dict = {}
    link_features_dict['type'] = 'retail_purchase'
    link_features_dict['account_from'] = random.choices(ind_acct_ids)[0]
    link_features_dict['account_to'] = random.choices(biz_acct_ids)[0]
    link_features_dict['amount_range'] = int(random.uniform(30,300))
    link_features_dict['txn_prob'] = 0.8
    link_features_dict['MoP'] = 'FPS' # Faster payments
    return link_features_dict

def create_supplier_link(ind_acct_ids, biz_acct_ids):
    link_features_dict = {}
    link_features_dict['type'] = 'supplier'
    link_features_dict['account_from'] = random.choices(biz_acct_ids)[0]
    link_features_dict['account_to'] = random.choices(biz_acct_ids)[0]
    link_features_dict['amount_range'] = int(random.uniform(1000,10000))
    link_features_dict['txn_prob'] = 0.8
    link_features_dict['MoP'] = 'BACS'
    return link_features_dict

def create_salary_link(ind_acct_ids, biz_acct_ids):
    link_features_dict = {}
    link_features_dict['type'] = 'salary'
    link_features_dict['account_from'] = random.choices(biz_acct_ids)[0]
    link_features_dict['account_to'] = random.choices(ind_acct_ids)[0]
    link_features_dict['amount_range'] = int(random.uniform(1000,9000))
    link_features_dict['txn_prob'] = 0.033
    link_features_dict['MoP'] = 'BACS'
    return link_features_dict

link_creator_map = {"retail_purchase" : create_retail_link,
                    "supplier" : create_supplier_link,
                    "salary" : create_salary_link}

def create_links(link_types_list, ind_acct_ids, biz_acct_ids):
    link_features_list = []
    for link_type in link_types_list:
        # Get the link_creator_func function from link_creator_map dictionary
        link_creator_func = link_creator_map.get(link_type)
        # Execute the link_creator_func function
        link_features_list.append(link_creator_func(ind_acct_ids, biz_acct_ids))
    
    return pd.DataFrame(link_features_list)

links_df = create_links(link_types_list, ind_acct_ids, biz_acct_ids)

Unnamed: 0,type,account_from,account_to,amount_range,txn_prob,MoP
0,retail_purchase,I_96_curr,B_103_curr,147,0.8,FPS
1,retail_purchase,I_17_curr,B_101_curr,91,0.8,FPS
2,supplier,B_102_curr,B_100_curr,7546,0.8,BACS
3,retail_purchase,I_91_curr,B_102_curr,36,0.8,FPS
4,retail_purchase,I_98_curr,B_107_curr,36,0.8,FPS
...,...,...,...,...,...,...
995,retail_purchase,I_24_curr,B_101_curr,91,0.8,FPS
996,retail_purchase,I_64_curr,B_107_curr,210,0.8,FPS
997,retail_purchase,I_9_curr,B_101_curr,222,0.8,FPS
998,retail_purchase,I_29_curr,B_107_curr,105,0.8,FPS


In [21]:
link_types_list

['retail_purchase',
 'retail_purchase',
 'supplier',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'salary',
 'retail_purchase',
 'supplier',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'supplier',
 'retail_purchase',
 'supplier',
 'supplier',
 'retail_purchase',
 'supplier',
 'supplier',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'salary',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'supplier',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'retail_purchase',
 'supplier',
 'retail_purchase',
 