In [1]:
from functools import wraps
import collections
import os
import time

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, Binarizer, OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

def lazy_property(func):
    attr = '_lazy_' + func.__name__

    @property
    @wraps(func)
    def wrapper(self):
        if not hasattr(self, attr):
            setattr(self, attr, func(self))
        return getattr(self, attr)
    return wrapper

  return f(*args, **kwds)
Using TensorFlow backend.


In [2]:
class Param:
    def __setattr__(self, attr, value):
        self.__dict__[attr] = value

param = Param()
## 数据
param.brand_num = 2768 + 1 + 1       # 大于等于5个数2768
param.category_num = 1127 + 1 + 1     # 大于等于5个数1127
param.condition_num = 4
param.vocabulary_size = 150000
param.seq_desc_len = 100
param.seq_name_len = 50
param.seq_cate_len = 50
## 模型结构
param.seq_embed_dim = 60
param.brand_embed_dim = 40
param.category_embed_dim = 20
## 训练
param.epochs = 5
param.batch_size = 10000
param.lr = 0.1
param.keep_prob = 0.9

def list_read(x):
    x = x.strip("[]")
    if x == '':
        return list()
    else:
        return list(map(int, x.split(', ')))

merge = pd.read_csv('/Users/zhouzhirui/data/Mercari_Price_Forcast/merge.csv', 
                    converters={"seq_name": list_read, 'seq_description':list_read, 'seq_category':list_read})

In [3]:
def gen_batch_data(dataset, batch_size):
    dataset = dataset.sample(frac=1.0).reset_index(drop=True)
    max_step = dataset.shape[0] // batch_size
    for step in range(max_step):
        batch = dataset.iloc[step*batch_size : (step+1)*batch_size, :]
        yield gen_tf_data(batch)

def gen_tf_data(subdata):
    tfdata = dict()
    tfdata['seq_desc'] = tf.keras.preprocessing.sequence.pad_sequences(subdata['seq_description'], maxlen=param.seq_desc_len, padding='post')
    tfdata['seq_cate'] = tf.keras.preprocessing.sequence.pad_sequences(subdata['seq_category'], maxlen=param.seq_cate_len, padding='post')
    tfdata['seq_name'] = tf.keras.preprocessing.sequence.pad_sequences(subdata['seq_name'], maxlen=param.seq_name_len, padding='post')
    tfdata['condition'] = subdata.condition.values.reshape(-1,1)
    tfdata['shipping'] = subdata.shipping.values.reshape(-1,1)
    tfdata['brand'] = subdata.brand.values.reshape(-1,1)
    tfdata['category'] = subdata.category.values.reshape(-1,1)
    tfdata['price'] = np.log1p(subdata['price']).values.reshape(-1,1)
    return tfdata

train = merge[merge.test_id.isnull()][:-param.batch_size]
val = merge[merge.test_id.isnull()][-param.batch_size:]
val_tfdata = gen_tf_data(val)

In [4]:
class Inputs(object):
    def __init__(self, param):
#         with tf.name_scope('inputs'):
            self.seq_desc = tf.placeholder(dtype=tf.int32, shape=[None, param.seq_desc_len], name='seq_desc')
            self.seq_cate = tf.placeholder(dtype=tf.int32, shape=[None, param.seq_cate_len], name='seq_cate')
            self.seq_name = tf.placeholder(dtype=tf.int32, shape=[None, param.seq_name_len], name='seq_name')
            self.brand = tf.placeholder(dtype=tf.int32, shape=[None, 1])
            self.category = tf.placeholder(dtype=tf.int32, shape=[None, 1])
            self.shipping = tf.placeholder(dtype=tf.float32, shape=[None, 1])
            self.condition = tf.placeholder(dtype=tf.int32, shape=[None, 1])
            self.price = tf.placeholder(dtype=tf.float32, shape=[None,1], name='price')


def add_fc_layer(name, inputs, units, keep_prob, is_train, activation=None):
    init = tf.initializers.random_normal()
    with tf.name_scope(name) as ns:
        with tf.variable_scope(name) as vs:
            w = tf.get_variable('W', shape=[inputs.get_shape()[1], units], initializer=init)
            b = tf.get_variable('b', shape=[units])
            wx_plus_b = tf.nn.bias_add(tf.matmul(inputs, w, name='wx'), b, name='wx_plus_b')
            if activation:
                wx_plus_b = activation(wx_plus_b)
            if (keep_prob is not None) and is_train:
                wx_plus_b = tf.nn.dropout(wx_plus_b, keep_prob=keep_prob)
            tf.summary.histogram('W'%name, w)
            tf.summary.histogram('W'%name, b)
            return wx_plus_b

def add_embed_layer(name, inputs, input_dim=None, output_dim=None, reuse=False):
    init = tf.initializers.random_normal()
    with tf.name_scope(name) as ns:
        with tf.variable_scope(name) as vs:
            if reuse:
                vs.reuse_variables()
                matrix = tf.get_variable('matrix')
            else:
                matrix = tf.get_variable('matrix', shape=[input_dim, output_dim], initializer=init)
                tf.summary.histogram('matrix'%name, matrix)
        embed = tf.nn.embedding_lookup(matrix, inputs)
            
        return embed

def add_rnn_layer(name, inputs, cell, units, keep_prob, batch_size, is_train):
    if not is_train:
        batch_size = 1
    with tf.name_scope(name) as ns:
        with tf.variable_scope(name) as vs:
            if cell == 'lstm':
                cell = tf.nn.rnn_cell.BasicLSTMCell(units)
            else:
                cell = tf.nn.rnn_cell.BasicRNNCell(units)
            if is_train and (keep_prob is not None):
                cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
            init = cell.zero_state(batch_size, tf.float32)
            outputs, state = tf.nn.dynamic_rnn(cell, inputs, initial_state=init)
            outputs = tf.reshape(outputs[:,-1,:], [-1, units])
            return outputs

def add_concat_layer(name, tensors):
    with tf.name_scope(name):
        concat = tf.concat(tensors, axis=1)
        return concat

In [5]:
class RNN:
    def __init__(self, is_train, param, inputs):
        # config
        self.is_train = is_train,
        self.param = param
        # op
        self.inputs = inputs
        self.predict
        self.loss
        self.train
    
    @lazy_property
    def predict(self):
        seq_desc_embed = add_embed_layer(
            name='seq_embed', 
            inputs=self.inputs.seq_desc, 
            input_dim=self.param.vocabulary_size, 
            output_dim=self.param.seq_embed_dim
        )
        seq_cate_embed = add_embed_layer(
            name='seq_embed', 
            inputs=self.inputs.seq_cate, 
            reuse=True
        )
        seq_name_embed = add_embed_layer(
            name='seq_embed', 
            inputs=self.inputs.seq_name, 
            reuse=True
        )
        brand_embed = add_embed_layer(
            name='brand_embed', 
            inputs=self.inputs.brand, 
            input_dim=self.param.brand_num, 
            output_dim=self.param.brand_embed_dim
        )
        brand_embed = tf.reshape(brand_embed, shape=[-1, self.param.brand_embed_dim])
        
        category_embed = add_embed_layer(
            name='category_embed', 
            inputs=self.inputs.category, 
            input_dim=self.param.category_num, 
            output_dim=self.param.category_embed_dim
        )
        category_embed = tf.reshape(category_embed, shape=[-1, self.param.category_embed_dim])
            
        desc_rnn = add_rnn_layer('desc_lstm', seq_desc_embed, 'lstm', 16, self.param.keep_prob, self.param.batch_size, self.is_train)
        name_rnn = add_rnn_layer('name_lstm', seq_name_embed, 'lstm', 8, self.param.keep_prob, self.param.batch_size, self.is_train)
        cate_rnn = add_rnn_layer('cate_lstm', seq_cate_embed, 'lstm', 8, self.param.keep_prob, self.param.batch_size, self.is_train)
        
        condition = tf.reshape(tf.one_hot(self.inputs.condition, depth=self.param.condition_num),[-1, self.param.condition_num])
        shipping = self.inputs.shipping
        main = add_concat_layer('concat', [condition, shipping, desc_rnn, name_rnn, cate_rnn, brand_embed, category_embed])
        
        fc1 = add_fc_layer('fc1', main, 64, self.param.keep_prob, self.is_train, tf.nn.relu)
        fc2 = add_fc_layer('fc2', fc1, 32, self.param.keep_prob, self.is_train, tf.nn.relu)
        output = add_fc_layer('predict', fc2, 1, 1, False, None)
        
        return output
    
    @lazy_property
    def loss(self):
        mse = tf.losses.mean_squared_error(self.predict, self.inputs.price)
        tf.summary.scalar('mse', mse)
        return mse
    
    @lazy_property
    def train(self):
        opt = tf.train.GradientDescentOptimizer(self.param.lr).minimize(self.loss)
        return opt

In [6]:
inputs = Inputs(param)
# with tf.variable_scope('rnn') as vs:
model = RNN(True, param, inputs)
#     vs.reuse_variables()
#     model_test = RNN(False, param, inputs)

merge_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('/Users/zhouzhirui/Desktop/log', tf.get_default_graph())
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [7]:
for i in range(param.epochs):
    k = 0
    for train_tfdata in gen_batch_data(train, param.batch_size):
        _,loss,summary = sess.run([model.train, model.loss, merge_summary], 
                             {
                                 model.inputs.price:train_tfdata['price'],
                              model.inputs.seq_cate:train_tfdata['seq_cate'],
                              model.inputs.seq_desc:train_tfdata['seq_desc'],
                              model.inputs.seq_name:train_tfdata['seq_name'],
                              model.inputs.brand:train_tfdata['brand'],
                              model.inputs.category:train_tfdata['category'],
                             model.inputs.shipping:train_tfdata['shipping'],
                                 model.inputs.condition:train_tfdata['condition']
                             })
        writer.add_summary(summary, k)
        if k%5 == 0:
            loss_val = sess.run(model.loss, 
                             {
                                 model.inputs.price:val_tfdata['price'],
                              model.inputs.seq_cate:val_tfdata['seq_cate'],
                              model.inputs.seq_desc:val_tfdata['seq_desc'],
                              model.inputs.seq_name:val_tfdata['seq_name'],
                             model.inputs.brand:val_tfdata['brand'],
                             model.inputs.category:val_tfdata['category'],
                                 model.inputs.shipping:val_tfdata['shipping'],
                                 model.inputs.condition:val_tfdata['condition']
                             })
            print('epoch:%d  step%d : train_loss:%.4f ,val_loss:%.4f'%(i, k, loss, loss_val))
        k+=1

epoch:0  step0 : train_loss:86293.0000 ,val_loss:10520232332547273808412672.0000


InvalidArgumentError: Nan in summary histogram for: predict/predict/predict/W
	 [[Node: predict/predict/predict/W = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](predict/predict/predict/W/tag, predict/W/read)]]

Caused by op 'predict/predict/predict/W', defined at:
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-9a0a93652894>", line 3, in <module>
    model = RNN(True, param, inputs)
  File "<ipython-input-5-d5d9205cadef>", line 8, in __init__
    self.predict
  File "<ipython-input-1-3d4a91bc1e27>", line 24, in wrapper
    setattr(self, attr, func(self))
  File "<ipython-input-5-d5d9205cadef>", line 56, in predict
    output = add_fc_layer('predict', fc2, 1, 1, False, None)
  File "<ipython-input-4-5188741a5965>", line 25, in add_fc_layer
    tf.summary.histogram('%s/W'%name, w)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tensorflow/python/summary/summary.py", line 192, in histogram
    tag=tag, values=values, name=scope)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 188, in _histogram_summary
    "HistogramSummary", tag=tag, values=values, name=name)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Users/zhouzhirui/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Nan in summary histogram for: predict/predict/predict/W
	 [[Node: predict/predict/predict/W = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](predict/predict/predict/W/tag, predict/W/read)]]
