Skip to content

Commit a2535e1

Browse files
committed
feed placeholder
1 parent c060ec5 commit a2535e1

15 files changed

+494
-76
lines changed

Diff for: demo/deep_model/freeze_graph.sh

+17-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
11
#!/usr/bin/env bash
22

3+
# Official. Don't know why not works
34
#MODEL_DIR=`pwd`
45
#echo $MODEL_DIR
5-
#TensorFlow_HOME=/Users/liujinpeng02/github/tensorflow
6+
#TensorFlow_HOME=/Users/formath/github/tensorflow
67
#cd ${TensorFlow_HOME}
78
#python tensorflow/python/tools/freeze_graph.py \
89
# --input_graph=${MODEL_DIR}/model/graph.pb \
9-
# --input_checkpoint=${MODEL_DIR}/model/model.ckeckpoint \
10-
# --output_graph=${MODEL_DIR}/model/freeze_graph.pb \
11-
# --output_node_names=cross_entropy
10+
# --input_checkpoint=${MODEL_DIR}/checkpoint/model.ckeckpoint \
11+
# --output_graph=${MODEL_DIR}/model \
12+
# --output_node_names='predict/add'
1213
#cd -
1314

15+
# Official. Don't know why not works
16+
#python ../../python/freeze.py \
17+
# --checkpoint_dir='./checkpoint' \
18+
# --graph_pb='./model/predict_graph.pb' \
19+
# --output_node_names='predict/add' \
20+
# --output_pb='./model/freeze.pb'
21+
22+
# Hack. This works
1423
python ../../python/freeze_graph.py \
15-
--model_dir=./saved_model \
16-
--output_node_names=Softmax
24+
--checkpoint_dir='./checkpoint' \
25+
--graph_pb='./model/predict_graph.pb' \
26+
--output_node_names='predict/add' \
27+
--output_dir='./model'

Diff for: demo/deep_model/predict.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
#!/usr/bin/env bash
22

3-
# TODO
3+
../../bin/deep_model.bin \
4+
"9,6,116" \
5+
"152,179" \
6+
"" \
7+
"./model/freeze_graph.pb"

Diff for: demo/deep_model/train.sh

+13
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
11
#!/usr/bin/env bash
22

3+
# train
4+
# to save model and checkpoint
35
python ../../python/train.py \
46
--dict "./data/dict.data" \
57
--continuous_fields "" \
68
--sparse_fields "9,6,116" \
79
--linear_fields "152,179" \
810
--train_file "./data/libfm.tfrecord" \
911
--valid_file "./data/libfm.tfrecord"
12+
13+
# just save a model same with train
14+
# except tf.Example input part replaced by placeholder
15+
# for feed Tensor when prediction
16+
python ../../python/predict_model.py \
17+
--dict "./data/dict.data" \
18+
--continuous_fields "" \
19+
--sparse_fields "9,6,116" \
20+
--linear_fields "152,179" \
21+
--train_file "./data/libfm.tfrecord" \
22+
--valid_file "./data/libfm.tfrecord"

Diff for: demo/deep_model/trans_data_to_tfrecord.sh

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ python ../../python/dict.py \
88
./data/libfm.data \
99
./data/dict.data
1010

11+
if [[ $? != 0 ]]; then
12+
echo "generate dict error" && exit 1
13+
fi
14+
1115
# transform libfm data into tfrecord
1216
python ../../python/data.py \
1317
./data/dict.data \
@@ -16,3 +20,7 @@ python ../../python/data.py \
1620
'152,179' \
1721
./data/libfm.data \
1822
./data/libfm.tfrecord
23+
24+
if [[ $? != 0 ]]; then
25+
echo "generate tfrecord error" && exit 1
26+
fi

Diff for: demo/simple_model/predict.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/usr/bin/env bash
22

3-
../../bin/simple_model.bin "./saved_model/graph.pb"
3+
../../bin/simple_model.bin "./model/graph.pb"

Diff for: python/data.py

+58-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import sys
44
import os
5-
import pickle
5+
from dict_pb2 import Dict as DictPB
66
import tensorflow as tf
77

88
class Data:
@@ -11,12 +11,16 @@ def __init__(self, dict_file, continuous_fields, sparse_fields, linear_fields):
1111
self.ParseFields(continuous_fields, sparse_fields, linear_fields)
1212

1313
# load fieldid and its featureid dict
14-
# field : {featureid : sortid, featureid : sortid, 'miss' : sortid, 'num': feature_num}
1514
def LoadDict(self, dict_file):
16-
self.field_feature_dict = pickle.load(open(dict_file, 'rb'))
17-
print('load field num: ' + str(len(self.field_feature_dict)))
18-
for fieldid in self.field_feature_dict:
19-
print('field: ' + str(fieldid) + ' feature num: ' + str(self.field_feature_dict[fieldid]['num']))
15+
f = open(dict_file, 'rb')
16+
dict = DictPB()
17+
dict.ParseFromString(f.read())
18+
self.field_feature_dict = dict
19+
f.close()
20+
print('load field num: ' + str(len(self.field_feature_dict.field2missid)))
21+
for fieldid, feanum in self.field_feature_dict.field2feanum.items():
22+
print('field: ' + str(fieldid) + ' feature num: ' + str(feanum))
23+
2024

2125
def Dict(self):
2226
assert self.field_feature_dict is not None
@@ -67,10 +71,10 @@ def StringToRecord(self, input_file, output_file):
6771
if fieldid in field2feature:
6872
for featureid in field2feature[fieldid]:
6973
value = field2feature[fieldid][featureid]
70-
feature_id_list.append(self.field_feature_dict[fieldid][featureid])
74+
feature_id_list.append(self.field_feature_dict.featureid2sortid[featureid])
7175
feature_val_list.append(value)
7276
else:
73-
feature_id_list.append(self.field_dict[fieldid]['miss'])
77+
feature_id_list.append(self.field_feature_dict.field2missid[fieldid])
7478
feature_val_list.append(0.0)
7579
feature['sparse_id_in_field_'+str(fieldid)] = tf.train.Feature(int64_list=tf.train.Int64List(value=feature_id_list))
7680
feature['sparse_val_in_field_'+str(fieldid)] = tf.train.Feature(float_list=tf.train.FloatList(value=feature_val_list))
@@ -80,10 +84,10 @@ def StringToRecord(self, input_file, output_file):
8084
if fieldid in field2feature:
8185
for featureid in field2feature[fieldid]:
8286
value = field2feature[fieldid][featureid]
83-
feature_id_list.append(self.field_feature_dict[fieldid][featureid])
87+
feature_id_list.append(self.field_feature_dict.featureid2sortid[featureid])
8488
feature_val_list.append(value)
8589
else:
86-
feature_id_list.append(self.field_dict[fieldid]['miss'])
90+
feature_id_list.append(self.field_feature_dict.field2missid[fieldid])
8791
feature_val_list.append(0.0)
8892
feature['linear_id_in_field_'+str(fieldid)] = tf.train.Feature(int64_list=tf.train.Int64List(value=feature_id_list))
8993
feature['linear_val_in_field_'+str(fieldid)] = tf.train.Feature(float_list=tf.train.FloatList(value=feature_val_list))
@@ -101,14 +105,17 @@ def StringToRecord(self, input_file, output_file):
101105
writer.write(example.SerializeToString())
102106

103107
writer.close()
104-
print('Successfully convert {} to {}'.format(input_file, output_file))
108+
print('Successfully convert {} to {}'.format(input_file, output_file))
105109

106110
def Decode(self, filename_queue):
107111
reader = tf.TFRecordReader()
108112
_, serialized_example = reader.read(filename_queue)
109113
return serialized_example
110114

111115
def ReadBatch(self, file_name, max_epoch, batch_size, thread_num, min_after_dequeue):
116+
'''
117+
Return Tensor and SparseTensor parsed from tfrecord
118+
'''
112119
with tf.name_scope('input'):
113120
filename_queue = tf.train.string_input_producer(
114121
tf.train.match_filenames_once(file_name), num_epochs=max_epoch)
@@ -144,6 +151,45 @@ def ReadBatch(self, file_name, max_epoch, batch_size, thread_num, min_after_dequ
144151
continuous_val = instance['continuous_val']
145152
return instance['label'], sparse_id, sparse_val, linear_id, linear_val, continuous_val
146153

154+
def ReadBatchPlaceholder(self):
155+
'''
156+
Return placeholder
157+
'''
158+
with tf.name_scope('input'):
159+
sparse_id = []
160+
sparse_val = []
161+
for fieldid in self.sparse_field:
162+
with tf.variable_scope('sparse_'+str(fieldid)):
163+
with tf.variable_scope('index'):
164+
self.sparse_index = tf.placeholder(tf.int64)
165+
with tf.variable_scope('id'):
166+
self.sparse_ids = tf.placeholder(tf.int64)
167+
with tf.variable_scope('value'):
168+
self.sparse_vals = tf.placeholder(tf.float32)
169+
with tf.variable_scope('shape'):
170+
self.sparse_shape = tf.placeholder(tf.int64)
171+
sparse_id.append(tf.SparseTensor(self.sparse_index, self.sparse_ids, self.sparse_shape))
172+
sparse_val.append(tf.SparseTensor(self.sparse_index, self.sparse_vals, self.sparse_shape))
173+
linear_id = []
174+
linear_val = []
175+
for fieldid in self.linear_field:
176+
with tf.variable_scope('linear_'+str(fieldid)):
177+
with tf.variable_scope('index'):
178+
self.linear_index = tf.placeholder(tf.int64)
179+
with tf.variable_scope('id'):
180+
self.linear_ids = tf.placeholder(tf.int64)
181+
with tf.variable_scope('value'):
182+
self.linear_vals = tf.placeholder(tf.float32)
183+
with tf.variable_scope('shape'):
184+
self.linear_shape = tf.placeholder(tf.int64)
185+
linear_id.append(tf.SparseTensor(self.linear_index, self.linear_ids, self.linear_shape))
186+
linear_val.append(tf.SparseTensor(self.linear_index, self.linear_vals, self.linear_shape))
187+
with tf.variable_scope('label'):
188+
self.label = tf.placeholder(tf.float32)
189+
with tf.variable_scope('continuous'):
190+
self.continuous_val = tf.placeholder(tf.float32)
191+
return self.label, sparse_id, sparse_val, linear_id, linear_val, self.continuous_val
192+
147193
if __name__ == '__main__':
148194
if len(sys.argv) != 7:
149195
print('''
@@ -157,4 +203,4 @@ def ReadBatch(self, file_name, max_epoch, batch_size, thread_num, min_after_dequ
157203
''')
158204
exit(1)
159205
data = Data(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
160-
data.StringToRecord(sys.argv[5], sys.argv[6])
206+
data.StringToRecord(sys.argv[5], sys.argv[6])

Diff for: python/deep_model.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ def __init__(self, embedding_size, field_feature_dict, sparse_field, continuous_
2828
def concat(self, fields, sparse_id, sparse_val):
2929
emb = []
3030
for i, field_id in enumerate(fields):
31-
input_size = self.field_feature_dict[field_id]['num'] + 1
32-
with tf.variable_scope("emb") as scope:
31+
input_size = self.field_feature_dict.field2feanum[field_id] + 1
32+
with tf.variable_scope("emb_"+str(field_id)):
3333
embedding_variable = tf.Variable(tf.truncated_normal([input_size, self.embedding_size], stddev=0.05), name='emb' + str(field_id))
34-
embedding = tf.nn.embedding_lookup_sparse(embedding_variable, sparse_id[i], sparse_val[i], "mod", combiner="sum")
35-
emb.append(embedding)
34+
embedding = tf.nn.embedding_lookup_sparse(embedding_variable, sparse_id[i], sparse_val[i], "mod", combiner="sum")
35+
emb.append(embedding)
3636
#tf.summary.histogram('emb_' + str(field_id), embedding_variable)
3737
self.embedding.append(embedding_variable)
3838

@@ -56,8 +56,12 @@ def forward(self, sparse_id, sparse_val, linear_id, linear_val, continuous_val):
5656

5757
#hidden layers
5858
for i, hidden_size in enumerate(self.hidden_layer):
59-
dim = net.get_shape().as_list()[1]
60-
with tf.variable_scope("hidden") as scope:
59+
#dim = net.get_shape().as_list()[1]
60+
if i == 0:
61+
dim = self.embedding_size * len(self.sparse_field) + len(self.continuous_field)
62+
else:
63+
dim = self.hidden_layer[i-1]
64+
with tf.variable_scope("hidden"):
6165
weight = tf.Variable(tf.truncated_normal([dim, hidden_size], stddev=0.05), name='fully_weight_'+str(i))
6266
bias = tf.Variable(tf.truncated_normal([hidden_size], stddev=0.05), name='fully_bias_'+str(i))
6367
self.hiddenW.append(weight)
@@ -71,12 +75,16 @@ def forward(self, sparse_id, sparse_val, linear_id, linear_val, continuous_val):
7175
linear_embedding = self.concat(self.linear_field, linear_id, linear_val)
7276
net = tf.concat([net, linear_embedding], 1, name='concat_linear')
7377

74-
dim = net.get_shape().as_list()[1]
78+
#dim = net.get_shape().as_list()[1]
79+
dim = self.hidden_layer[-1]
80+
if len(self.linear_field) > 0:
81+
dim += self.embedding_size * len(self.linear_field)
7582
print("out layer dim:" + str(dim))
76-
with tf.variable_scope("outlayer") as scope:
83+
with tf.variable_scope("outlayer"):
7784
self.weight = tf.Variable(tf.truncated_normal([dim, 1], stddev=0.05), name='weight_out')
7885
self.bias = tf.Variable(tf.truncated_normal([1], stddev=0.05), name='bias_out')
79-
logits = tf.matmul(net, self.weight) + self.bias
86+
with tf.variable_scope("predict"):
87+
logits = tf.matmul(net, self.weight) + self.bias
8088

8189
# add regularization
8290
all_parameter = [self.weight, self.bias] + self.hiddenW + self.hiddenB + self.embedding

Diff for: python/dict.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import sys
44
import os
5-
import pickle
5+
from dict_pb2 import Dict as DictPB
66

77
class Dict:
88
def __init__(self, continuous_fields, sparse_fields, linear_fields):
@@ -49,8 +49,19 @@ def Parse(self, input_file, output_file):
4949
for fieldid in field_feature_dict:
5050
print('field: ' + str(fieldid) + ' feature num: ' + str(field_feature_dict[fieldid]['num']))
5151

52+
dict = DictPB()
53+
for fieldid, feature2sortid_dict in field_feature_dict.items():
54+
for featureid, sortid in feature2sortid_dict.items():
55+
if featureid == 'miss':
56+
dict.field2missid[fieldid] = sortid
57+
continue
58+
if featureid == 'num':
59+
dict.field2feanum[fieldid] = sortid
60+
continue
61+
dict.featureid2sortid[featureid] = sortid
5262
output = open(output_file, 'wb')
53-
pickle.dump(field_feature_dict, output, 2)
63+
output.write(dict.SerializeToString())
64+
output.close()
5465
print('Successfully generate dict from {} to {}'.format(input_file, output_file))
5566

5667
if __name__ == '__main__':

0 commit comments

Comments
 (0)