In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import scipy
import numba as nb
import time
from numba import guvectorize, vectorize, float64, int64, void

In [2]:
NUM_CHLD = 1000000
WISH_SIZE = 100
PREF_SIZE = 1000
NUM_GIFT = 1000
NUM_TRIP = 1667
NUM_TWIN = 20000
NUM_SING = NUM_CHLD - 2*NUM_TWIN - 3*NUM_TRIP
NUM_UNIT = NUM_SING + NUM_TWIN + NUM_TRIP
HIDDEN = 1024
print(NUM_SING)
print(NUM_UNIT)

954999
976666


In [3]:
def load_data(data_path):
    return pd.read_csv(data_path, header=None).as_matrix()[:, 1:]
%time chld_wish_list = load_data('../data/child_wishlist_v2.csv')
%time gift_pref_list = load_data('../data/gift_goodkids_v2.csv')
print(chld_wish_list.shape)
print(gift_pref_list.shape)
%time print(chld_wish_list.max())
%time print(gift_pref_list.max())

CPU times: user 21.1 s, sys: 2.2 s, total: 23.3 s
Wall time: 24.8 s
CPU times: user 358 ms, sys: 32.6 ms, total: 390 ms
Wall time: 429 ms
(1000000, 100)
(1000, 1000)
999
CPU times: user 184 ms, sys: 3.39 ms, total: 188 ms
Wall time: 221 ms
999999
CPU times: user 1.24 ms, sys: 1 µs, total: 1.24 ms
Wall time: 1.25 ms


In [4]:
## Creating C, We need to treat singles, twins, triplets separately
def get_wish_score(wish):
    assert(wish.max() < NUM_GIFT)
    assert(wish.shape[0] == NUM_CHLD)
    ans = np.zeros((NUM_CHLD, NUM_GIFT)) - 1.
    for i in range(wish.shape[0]):
        for j in range(wish.shape[1]):
            ans[i][wish[i][j]] = WISH_SIZE - j
    ans /= WISH_SIZE
    return ans
%time C = get_wish_score(chld_wish_list)
for i in range(NUM_TRIP):
    C[i] = C[3*i] + C[3*i+1] + C[3*i+2]
for i in range(NUM_TWIN):
    C[3*NUM_TRIP + i] = C[3*NUM_TRIP + 2*i] + C[3*NUM_TRIP + 2*i + 1]
C = np.concatenate([C[:NUM_TRIP], C[3*NUM_TRIP: 3*NUM_TRIP + NUM_TWIN], C[3*NUM_TRIP + 2*NUM_TWIN:]])
print(C.shape)

CPU times: user 1min 48s, sys: 25.6 s, total: 2min 14s
Wall time: 2min 28s
(976666, 1000)


In [5]:
## Getting pref_score
def get_pref_score(pref):
    assert(pref.max() < NUM_CHLD)
    assert(pref.shape[0] == NUM_GIFT)
    ans = np.zeros((NUM_CHLD, NUM_GIFT)) - 1.
    for i in range(pref.shape[0]):
        for j in range(pref.shape[1]):
            ans[pref[i][j]][i] = PREF_SIZE - j
    ans /= PREF_SIZE
    return ans
%time S = get_pref_score(gift_pref_list)
for i in range(NUM_TRIP):
    S[i] = S[3*i] + S[3*i+1] + S[3*i+2]
for i in range(NUM_TWIN):
    S[3*NUM_TRIP + i] = S[3*NUM_TRIP + 2*i] + S[3*NUM_TRIP + 2*i + 1]
S = np.concatenate([S[:NUM_TRIP], S[3*NUM_TRIP: 3*NUM_TRIP + NUM_TWIN], S[3*NUM_TRIP + 2*NUM_TWIN:]])
print(S.shape)

CPU times: user 9.38 s, sys: 18 s, total: 27.3 s
Wall time: 37 s
(976666, 1000)


In [6]:
x_c = tf.placeholder(tf.float32, [None, NUM_GIFT])
x_s = tf.placeholder(tf.float32, [None, NUM_GIFT])

In [7]:
wc = tf.Variable(tf.random_normal((NUM_GIFT, HIDDEN)))
bc = tf.Variable(tf.random_normal([HIDDEN]))
ws = tf.Variable(tf.random_normal((NUM_GIFT, HIDDEN)))
bs = tf.Variable(tf.random_normal([HIDDEN]))
layer_c = tf.nn.relu(tf.matmul(x_c, wc) + bc)
layer_s = tf.nn.relu(tf.matmul(x_c, wc) + bs)

In [8]:
hidden = tf.concat([layer_c, layer_s], 1)
wh = tf.Variable(tf.random_normal((2 * HIDDEN, NUM_GIFT)))
bh = tf.Variable(tf.random_normal([NUM_GIFT]))
y = tf.nn.softmax(tf.matmul(hidden, wh) + bh)

In [9]:
obj_c = tf.reduce_mean(tf.matmul(x_c, y))
obj_s = tf.reduce_mean(tf.matmul(x_s, y))
obj = -obj_c**3 - obj_s**3

In [10]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.005).minimize(obj)

In [11]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [12]:
def next_batch(num, c, s):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 ,len(c))
    np.random.shuffle(idx)
    idx = idx[:num]
    c_shuffle = [c[i] for i in idx]
    s_shuffle = [s[ i] for i in idx]

    return np.asarray(c_shuffle), np.asarray(s_shuffle)

In [13]:
total_iteration = 0
def optimize(C_input, S_input, num_iterations = 500, batch_size = 256):
    start_time = time.time()
    for i in range(num_iterations):

        # Get training set
        c_batch, s_batch = next_batch(batch_size, C_input, S_input)
        train_set = {
            x_c: c_batch,
            x_s: s_batch,
        }
        sess.run(optimizer, feed_dict = train_set)
        if self.total_iteration%50 == 0:
            objc = sess.run(obj, feed_dict = train_set)

            # Message for printing
            msg = "Optimization Iteration: {0:>6}, objective: {1:>6.1%}"
            print(msg.format(total_iteration, objc))
            total_iteration += 1
    print("One set of training: DONE!")

    end_time = time.time()
    time_dif = end_time - start_time
    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
    return None

In [14]:
optimize(C, S)

InvalidArgumentError: Matrix size-incompatible: In[0]: [256,1000], In[1]: [256,1000]
	 [[Node: MatMul_4 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_Placeholder_1_0_1, Softmax)]]

Caused by op 'MatMul_4', defined at:
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 405, in start
    ioloop.IOLoop.instance().start()
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 260, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 212, in dispatch_shell
    handler(stream, idents, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 370, in execute_request
    user_expressions, allow_stdin)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2902, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3006, in run_ast_nodes
    if self.run_code(code, result):
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3066, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-ed7168af6114>", line 2, in <module>
    obj_s = tf.reduce_mean(tf.matmul(x_s, y))
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 1844, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1289, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [256,1000], In[1]: [256,1000]
	 [[Node: MatMul_4 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_Placeholder_1_0_1, Softmax)]]
