In [2]:
from joinboost_disk import *

In [3]:
def create_jg(con, unique_id = 0, sample=True):
    # name needs to be different for different views
    # learning  rate should be 1/# trees
    jg = joinGraph("favorita" + str(unique_id), con, log=False, max_leaves = 8,  learning_rate=0.01, target_variable ="Y")
    jg.add_table("sales", [], [], fact=True)
    jg.add_table("holidays", ["htype", "locale", "locale_name", "transferred","f2"], [2,2,2,2,2])
    jg.add_table("oil", ["dcoilwtico","f3"], [2,2])
    jg.add_table("transactions", ["transactions","f5"], [2,2,2])
    jg.add_table("stores", ["city","state","stype","cluster","f4"], [2,2,2,2,2,2])
    jg.add_table("items", ["family","class","perishable","f1"], [2,2,2,2])
    if sample:
        jg.create_sample_fact(sample_percent = 1, sample_seed = unique_id, view=False)

    jg.join(jg.fact, "items", ["item_nbr"], ["item_nbr"])
    jg.join(jg.fact, "transactions", ["tid"], ["tid"])
    jg.join("transactions", "stores", ["store_nbr"], ["store_nbr"])
    jg.join("transactions", "holidays", ["date"], ["date"])
    jg.join("holidays", "oil", ["date"], ["date"])
    return jg

In [4]:
con = duckdb.connect(database='fav_2.duckdb',check_same_thread=False)
con.execute("CREATE OR REPLACE TABLE holidays AS SELECT * FROM 'data/holidays.csv';")
con.execute("CREATE OR REPLACE TABLE oil AS SELECT * FROM 'data/oil.csv';")
con.execute("CREATE OR REPLACE TABLE transactions AS SELECT * FROM 'data/transactions.csv';")
con.execute("CREATE OR REPLACE TABLE stores AS SELECT * FROM 'data/stores.csv';")
con.execute("CREATE OR REPLACE TABLE items AS SELECT * FROM 'data/items.csv';")
con.execute("CREATE OR REPLACE TABLE sales AS SELECT * FROM 'data/train.csv';")
con.execute("CREATE OR REPLACE TABLE test AS SELECT * FROM 'data/test.csv';")
jg = create_jg(con, sample=False)
jg.create_dummy_model(replace=False)
con.close()

In [5]:
cons = dict()
tree_queries = []
# specify the number of trees
trees = list(range(100))

def init(unique_id):
    try:
        cons[unique_id] = duckdb.connect(database='fav_2.duckdb',check_same_thread=False)
    except Exception as e: print(e)

def train_tree(worker_id):
    con = cons[worker_id]
    while True:
        try:
            con.execute("PRAGMA threads=4;")
            tree_id = trees.pop()
            print(str(worker_id) + " trains tree " + str(tree_id))
            build_tree(con, tree_id)
        except Exception as e: 
            print(e)
            return

def build_tree(con, tree_id):
    jg = create_jg(con, tree_id)
    # get this from create_dummy_model
    jg.set_ts_tc(0.0, 80318105)
    jg.create_base_node()
    jg.build_gradient_tree()
    jg.clean_leaves()
    jg.clean_table()
    tree_queries.append(jg.tree_queries[0])
    print("Tree " + str(tree_id)  + "finishes:" + str(time.time() - initial_time))

In [12]:
def function_threading(func, num_threads):
    threads = dict()
    num_threads = num_threads
    for i in range(num_threads):
        threads[i] = threading.Thread(target=func, args=(i,))

    for i in range(num_threads):
        threads[i].start()

    for i in range(num_threads):
        threads[i].join()

In [13]:
%%time
function_threading(init, 16)

CPU times: user 840 ms, sys: 232 ms, total: 1.07 s
Wall time: 1.02 s


In [14]:
%%time
initial_time = time.time()
# how many threads
function_threading(train_tree, 16)

1 trains tree 99
5 trains tree 98
9 trains tree 97
13 trains tree 96
2 trains tree 95
6 trains tree 94
10 trains tree 93
3 trains tree 92
14 trains tree 91
7 trains tree 90
0 trains tree 89
11 trains tree 88
4 trains tree 87
15 trains tree 86
8 trains tree 85
12 trains tree 84



KeyboardInterrupt



Tree 96finishes:24.504839181900024
13 trains tree 83
Tree 90finishes:25.552266359329224
7 trains tree 82
Tree 94finishes:26.048116207122803
6 trains tree 81
Tree 91finishes:26.172285795211792
14 trains tree 80
Tree 87finishes:26.282246828079224
4 trains tree 79
Tree 92finishes:26.767805576324463
3 trains tree 78
Tree 93finishes:26.892659664154053
10 trains tree 77
Tree 84finishes:27.635936737060547
12 trains tree 76
Tree 86finishes:27.683531761169434
15 trains tree 75
Tree 98finishes:28.378198623657227
5 trains tree 74
Tree 97finishes:28.562456607818604
9 trains tree 73
Tree 85finishes:29.15358805656433
8 trains tree 72
Tree 99finishes:29.255725622177124
1 trains tree 71
Tree 88finishes:29.597915410995483
11 trains tree 70
Tree 95finishes:29.801080226898193
2 trains tree 69
Tree 89finishes:30.527759313583374
0 trains tree 68
Tree 82finishes:47.23465538024902
7 trains tree 67
Tree 79finishes:47.93283772468567
4 trains tree 66
Tree 83finishes:49.527594566345215
13 trains tree 65
Tree 77f