In [1]:
from joinboost_disk import *

In [2]:
def create_jg(con, unique_id = 0, sample=True):
    # name needs to be different for different views
    # learning  rate should be 1/# trees
    jg = joinGraph("favorita" + str(unique_id), con, log=False, max_leaves = 8,  learning_rate=0.01, target_variable ="Y")
    jg.add_table("sales", [], [], fact=True)
    jg.add_table("holidays", ["htype", "locale", "locale_name", "transferred","f2"], [2,2,2,2,2])
    jg.add_table("oil", ["dcoilwtico","f3"], [2,2])
    jg.add_table("transactions", ["transactions","f5"], [2,2])
    jg.add_table("stores", ["city","state","stype","cluster","f4"], [2,2,2,2,2])
    jg.add_table("items", ["family","class","perishable","f1"], [2,2,2,2])
    if sample:
        jg.create_sample_fact(sample_percent = 1, sample_seed = unique_id, view=True)

    jg.join(jg.fact, "items", ["item_nbr"], ["item_nbr"])
    jg.join(jg.fact, "transactions", ["tid"], ["tid"])
    jg.join("transactions", "stores", ["store_nbr"], ["store_nbr"])
    jg.join("transactions", "holidays", ["date"], ["date"])
    jg.join("holidays", "oil", ["date"], ["date"])
    return jg

In [3]:
con = duckdb.connect(database='fav_2.duckdb',check_same_thread=False)
con.execute("CREATE OR REPLACE TABLE holidays AS SELECT * FROM 'data/holidays.csv';")
con.execute("CREATE OR REPLACE TABLE oil AS SELECT * FROM 'data/oil.csv';")
con.execute("CREATE OR REPLACE TABLE transactions AS SELECT * FROM 'data/transactions.csv';")
con.execute("CREATE OR REPLACE TABLE stores AS SELECT * FROM 'data/stores.csv';")
con.execute("CREATE OR REPLACE TABLE items AS SELECT * FROM 'data/items.csv';")
con.execute("CREATE OR REPLACE TABLE sales AS SELECT * FROM 'data/train.csv';")
con.execute("CREATE OR REPLACE TABLE test AS SELECT * FROM 'data/test.csv';")
jg = create_jg(con, sample=False)
jg.create_dummy_model(replace=False)
con.close()

In [5]:
cons = dict()
tree_queries = []
# specify the number of trees
trees = list(range(100))

def init(unique_id):
    try:
        cons[unique_id] = duckdb.connect(database='fav_2.duckdb',check_same_thread=False)
    except Exception as e: print(e)

def train_tree(worker_id):
    con = cons[worker_id]
    while True:
        try:
            con.execute("PRAGMA threads=4;")
            tree_id = trees.pop()
            print(str(worker_id) + " trains tree " + str(tree_id))
            build_tree(con, tree_id)
        except Exception as e: 
            print(e)
            return

def build_tree(con, tree_id):
    jg = create_jg(con, tree_id)
    # get this from create_dummy_model
    jg.set_ts_tc(0.0, 80318105)
    jg.create_base_node()
    jg.build_gradient_tree()
    jg.clean_leaves()
    jg.clean_table()
    tree_queries.append(jg.tree_queries[0])
    print("Tree " + str(tree_id)  + "finishes:" + str(time.time() - initial_time))

In [6]:
def function_threading(func, num_threads):
    threads = dict()
    num_threads = num_threads
    for i in range(num_threads):
        threads[i] = threading.Thread(target=func, args=(i,))

    for i in range(num_threads):
        threads[i].start()

    for i in range(num_threads):
        threads[i].join()

In [7]:
%%time
function_threading(init, 16)

CPU times: user 10.9 s, sys: 3.3 s, total: 14.2 s
Wall time: 13.7 s


In [8]:
%%time
initial_time = time.time()
# how many threads
function_threading(train_tree, 16)

4 trains tree 99
5 trains tree 98
11 trains tree 97
6 trains tree 96
12 trains tree 95
7 trains tree 94
1 trains tree 93
2 trains tree 92
0 trains tree 91
8 trains tree 90
13 trains tree 89
9 trains tree 88
14 trains tree 87
15 trains tree 86
10 trains tree 85
3 trains tree 84
Tree 92finishes:13.029026985168457
2 trains tree 83
Tree 99finishes:13.067725896835327
4 trains tree 82
Tree 93finishes:13.235997438430786
1 trains tree 81
Tree 97finishes:13.338273525238037
11 trains tree 80
Tree 87finishes:13.340811252593994
14 trains tree 79
Tree 98finishes:13.37799859046936
5 trains tree 78
Tree 84finishes:13.410249710083008
3 trains tree 77
Tree 85finishes:13.443031311035156
10 trains tree 76
Tree 89finishes:13.673810482025146
13 trains tree 75
Tree 91finishes:13.755179643630981
0 trains tree 74
Tree 88finishes:13.80913257598877
9 trains tree 73
Tree 90finishes:13.831016540527344
8 trains tree 72
Tree 94finishes:14.053758144378662
7 trains tree 71
Tree 96finishes:14.129434823989868
6 trains 