Skip to content

Commit c060ec5

Browse files
committed
fix
1 parent 0a9094f commit c060ec5

File tree

4 files changed

+196
-34
lines changed

4 files changed

+196
-34
lines changed

Diff for: include/util.h

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef UTIL_H_
2+
#define UTIL_H_
3+
4+
#include <string>
5+
#include <vector>
6+
7+
namespace util {
8+
9+
std::string trim(const std::string& str, const std::string& whitespace = " \t") {
10+
const auto strBegin = str.find_first_not_of(whitespace);
11+
if (strBegin == std::string::npos) return "";
12+
const auto strEnd = str.find_last_not_of(whitespace);
13+
const auto strRange = strEnd - strBegin + 1;
14+
return str.substr(strBegin, strRange);
15+
}
16+
17+
void split(const std::string& s, char delim, std::vector<std::string>& elems) {
18+
elems.clear();
19+
std::stringstream ss(s);
20+
std::string item;
21+
while (std::getline(ss, item, delim)) {
22+
elems.push_back(item);
23+
}
24+
}
25+
26+
} // namespace util
27+
28+
#endif // UTIL_H_

Diff for: src/CMakeLists.txt

+9-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ set(DEEP_CTR_LINKER_LIBS "")
1818
list(APPEND DEEP_CTR_LINKER_LIBS libtensorflow-core-x86_64.a libprotobuf.a libnsync.a pthread m z)
1919

2020
# executable
21-
set(TEST "simple_model.bin")
22-
add_executable(${TEST} "simple_model.cc")
21+
set(SIMPLE_MODEL "simple_model.bin")
22+
add_executable(${SIMPLE_MODEL} "simple_model.cc")
2323
set(EXECUTABLE_OUTPUT_PATH "${PROJECT_SOURCE_DIR}/bin")
24-
target_link_libraries(${TEST} ${DEEP_CTR_LINKER_LIBS})
24+
target_link_libraries(${SIMPLE_MODEL} ${DEEP_CTR_LINKER_LIBS})
25+
26+
# executable
27+
set(DEEP_MODEL "deep_model.bin")
28+
add_executable(${DEEP_MODEL} "deep_model.cc")
29+
set(EXECUTABLE_OUTPUT_PATH "${PROJECT_SOURCE_DIR}/bin")
30+
target_link_libraries(${DEEP_MODEL} ${DEEP_CTR_LINKER_LIBS})

Diff for: src/deep_model.cc

+157-29
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
#include <string>
2+
#include <sstream>
3+
#include <vector>
4+
#include <unordered_map>
5+
#include "util.h"
16
#include "tensorflow/core/public/session.h"
27
#include "tensorflow/core/platform/env.h"
38
#include "tensorflow/core/framework/tensor.h"
49
#include "tensorflow/core/util/sparse/sparse_tensor.h"
10+
#include "tensorflow/core/framework/tensor_testutil.h"
11+
#include "tensorflow/core/framework/types.h"
12+
#include "tensorflow/core/framework/tensor.pb.h"
13+
#include "tensorflow/core/framework/variant_encode_decode.h"
14+
#include "tensorflow/core/framework/variant_tensor_data.h"
15+
#include "tensorflow/core/lib/strings/strcat.h"
16+
#include "tensorflow/core/platform/logging.h"
17+
#include "tensorflow/core/platform/test.h"
18+
#include "tensorflow/core/platform/test_benchmark.h"
519

620
using namespace tensorflow;
721

@@ -21,9 +35,9 @@ using namespace tensorflow;
2135
// 7 | 5 | 465 | 1.0 |
2236
// SparseTensor for field id, each SparseTensor construtct of three Tensor
2337
auto dense_int_indices1 =
24-
test::AsTensor<int64>({0, 0, 0, 1, 1, 0, 3, 0, 3, 1, 7, 0}, {6, 2}); // 每个数据对应[i,j],共有6个数字,所以6行,每行最大2个数字,所以2列
38+
test::AsTensor<int64>({0, 0, 0, 1, 1, 0, 3, 0, 3, 1, 7, 0}, {6, 2});
2539
auto dense_int_values1 = test::AsTensor<int64>({1, 8, 0, 2, 0, 5}); // row-major
26-
auto dense_int_shape1 = TensorShape({8, 2}); // [样本量,每个样本最大id数目]
40+
auto dense_int_shape1 = TensorShape({8, 2});
2741
sparse::SparseTensor sparse_tensor1(
2842
dense_int_indices1, dense_int_values1, dense_int_shape1);
2943
// SparseTensor for feature id
@@ -43,79 +57,193 @@ using namespace tensorflow;
4357
*/
4458

4559
int main(int argc, char* argv[]) {
60+
// parse field
61+
std::vector<std::string> tokens;
62+
std::vector<int> sparse_field; // sparse field
63+
util::split(argv[1], ',', tokens);
64+
for (std::string token: tokens) {
65+
sparse_field.push_back(std::stoi(token));
66+
}
67+
std::vector<int> linear_field; // linear field
68+
util::split(argv[2], ',', tokens);
69+
for (std::string token: tokens) {
70+
linear_field.push_back(std::stoi(token));
71+
}
72+
std::vector<int> continuous_field; // continuous field
73+
util::split(argv[3], ',', tokens);
74+
for (std::string token: tokens) {
75+
continuous_field.push_back(std::stoi(token));
76+
}
77+
4678
// Initialize a tensorflow session
4779
Session* session;
4880
Status status = NewSession(SessionOptions(), &session);
4981
if (!status.ok()) {
5082
std::cout << status.ToString() << "\n";
5183
return 1;
84+
} else {
85+
std::cout << "Session created successfully" << std::endl;
5286
}
5387

5488
// Read in the protobuf graph we exported
5589
// (The path seems to be relative to the cwd. Keep this in mind
5690
// when using `bazel run` since the cwd isn't where you call
5791
// `bazel run` but from inside a temp folder.)
5892
GraphDef graph_def;
59-
std::string graph_path = argv[1];
93+
std::string graph_path = argv[4];
6094
status = ReadBinaryProto(Env::Default(), graph_path, &graph_def);
6195
if (!status.ok()) {
62-
throw runtime_error("Error loading graph from " + graph_path + ": " + status.ToString());
96+
std::cout << status.ToString() << std::endl;
97+
} else {
98+
std::cout << "Load graph protobuf successfully" << std::endl;
6399
}
64100

65101
// Add the graph to the session
66102
status = session->Create(graph_def);
67103
if (!status.ok()) {
68-
throw runtime_error("Error set graph to session: " + status.ToString());
104+
std::cout << status.ToString() << std::endl;
105+
return 1;
106+
} else {
107+
std::cout << "Add graph to session successfully" << std::endl;
69108
}
70109

71110
// Read parameters from the saved checkpoint
72-
Tensor checkpointPathTensor(DT_STRING, TensorShape());
73-
std::string checkpoint_path = argv[2];
111+
/*Tensor checkpointPathTensor(DT_STRING, TensorShape());
112+
std::string checkpoint_path = argv[5];
74113
checkpointPathTensor.scalar<std::string>()() = checkpoint_path;
75114
status = session->Run(
76115
{{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
77116
{},
78117
{graph_def.saver_def().restore_op_name()},
79118
nullptr);
80119
if (!status.ok()) {
81-
throw runtime_error("Error loading checkpoint from " + checkpoint_path + ": " + status.ToString());
120+
std::cout << status.ToString() << std::endl;
121+
return 1;
122+
} else {
123+
std::cout << "Load checkpoint successfully" << std::endl;
124+
}*/
125+
126+
// Setup inputs and outputs
127+
// input 9:283:1 6:384:1 152:384:1
128+
std::string libfm_data = "9:283:1 6:384:1 152:384:1";
129+
std::unordered_map<int32, std::unordered_map<int32, float> > instance;
130+
std::vector<std::string> features;
131+
util::split(libfm_data, ' ', features);
132+
for (std::string feature: features) {
133+
std::vector<std::string> tokens;
134+
util::split(feature, ':', tokens);
135+
int32 fieldid;
136+
int32 featureid;
137+
float value;
138+
int i = 0;
139+
for (std::string token: tokens) {
140+
if (i == 0) {
141+
fieldid = std::stoi(token);
142+
} else if (i == 1) {
143+
featureid = std::stoi(token);
144+
} else if (i == 2) {
145+
value = std::stof(token);
146+
}
147+
i++;
148+
}
149+
if (instance.find(fieldid) == instance.end()) {
150+
std::unordered_map<int32, float> f;
151+
f[featureid] = value;
152+
instance[fieldid] = f;
153+
} else {
154+
instance[fieldid][featureid] = value;
155+
}
82156
}
83157

84-
// Setup inputs and outputs:
85-
86-
// Our graph doesn't require any inputs, since it specifies default values,
87-
// but we'll change an input to demonstrate.
88-
Tensor a(DT_FLOAT, TensorShape());
89-
a.scalar<float>()() = 3.0;
90-
91-
Tensor b(DT_FLOAT, TensorShape());
92-
b.scalar<float>()() = 2.0;
93-
94-
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
95-
{ "a", a },
96-
{ "b", b },
97-
};
158+
std::vector<std::pair<std::string, sparse::SparseTensor> > inputs;
159+
for (int i = 0; i < sparse_field.size(); i++) {
160+
uint32 fieldid = sparse_field[i];
161+
std::vector<int32> indice;
162+
std::vector<int32> fid_list;
163+
std::vector<float> fval_list;
164+
if (instance.find(fieldid) != instance.end()) {
165+
int num = 0;
166+
for (std::unordered_map<int32, float>::const_iterator iter = instance[fieldid].begin();
167+
iter != instance[fieldid].end(); iter++) {
168+
indice.push_back(0);
169+
indice.push_back(num++);
170+
fid_list.push_back(iter->first);
171+
fval_list.push_back(iter->second);
172+
}
173+
} else {
174+
fid_list.push_back(0); // missid
175+
fval_list.push_back(0.0);
176+
}
177+
auto id_indice_tensor =
178+
test::AsTensor<int32>(indice, {static_cast<int32>(indice.size()/2), 2});
179+
auto id_list_tensor = test::AsTensor<int32>(fid_list);
180+
auto id_tensor_shape = TensorShape({1, static_cast<int32>(fid_list.size())});
181+
sparse::SparseTensor id_sparse_tensor(id_indice_tensor, id_list_tensor, id_tensor_shape);
182+
auto val_indice_tensor =
183+
test::AsTensor<int32>(indice, {static_cast<int32>(indice.size()/2), 2});
184+
auto val_list_tensor = test::AsTensor<float>(fval_list);
185+
auto val_tensor_shape = TensorShape({1, static_cast<int32>(fval_list.size())});
186+
187+
// todo run embedding here
188+
189+
sparse::SparseTensor val_sparse_tensor(val_indice_tensor, val_list_tensor, val_tensor_shape);
190+
inputs.push_back(std::pair<std::string, sparse::SparseTensor>("sparse_id_in_field_"+std::to_string(fieldid), id_sparse_tensor));
191+
inputs.push_back(std::pair<std::string, sparse::SparseTensor>("sparse_val_in_field_"+std::to_string(fieldid), val_sparse_tensor));
192+
}
193+
for (int i = 0; i < linear_field.size(); i++) {
194+
uint32 fieldid = linear_field[i];
195+
std::vector<int32> indice;
196+
std::vector<int32> fid_list;
197+
std::vector<float> fval_list;
198+
if (instance.find(fieldid) != instance.end()) {
199+
int num = 0;
200+
for (std::unordered_map<int32, float>::const_iterator iter = instance[fieldid].begin();
201+
iter != instance[fieldid].end(); iter++) {
202+
indice.push_back(0);
203+
indice.push_back(num++);
204+
fid_list.push_back(iter->first);
205+
fval_list.push_back(iter->second);
206+
}
207+
} else {
208+
fid_list.push_back(0); // missid
209+
fval_list.push_back(0.0);
210+
}
211+
auto id_indice_tensor =
212+
test::AsTensor<int32>(indice, {static_cast<int32>(indice.size()/2), 2});
213+
auto id_list_tensor = test::AsTensor<int32>(fid_list);
214+
auto id_tensor_shape = TensorShape({1, static_cast<int32>(fid_list.size())});
215+
sparse::SparseTensor id_sparse_tensor(id_indice_tensor, id_list_tensor, id_tensor_shape);
216+
auto val_indice_tensor =
217+
test::AsTensor<int32>(indice, {static_cast<int32>(indice.size()/2), 2});
218+
auto val_list_tensor = test::AsTensor<float>(fval_list);
219+
auto val_tensor_shape = TensorShape({1, static_cast<int32>(fval_list.size())});
220+
sparse::SparseTensor val_sparse_tensor(val_indice_tensor, val_list_tensor, val_tensor_shape);
221+
inputs.push_back(std::pair<std::string, sparse::SparseTensor>("linear_id_in_field_"+std::to_string(fieldid), id_sparse_tensor));
222+
inputs.push_back(std::pair<std::string, sparse::SparseTensor>("linear_val_in_field_"+std::to_string(fieldid), val_sparse_tensor));
223+
}
98224

99225
// The session will initialize the outputs
100226
std::vector<tensorflow::Tensor> outputs;
101227

102-
// Run the session, evaluating our "c" operation from the graph
103-
status = session->Run(inputs, {"c"}, {}, &outputs);
228+
// Run the session, evaluating our "softmax" operation from the graph
229+
//status = session->Run(inputs, {"Softmax"}, {}, &outputs);
104230
if (!status.ok()) {
105-
std::cout << status.ToString() << "\n";
231+
std::cout << status.ToString() << std::endl;
106232
return 1;
233+
} else {
234+
std::cout << "Run session successfully" << std::endl;
107235
}
108236

109-
// Grab the first output (we only evaluated one graph node: "c")
237+
// Grab the first output (we only evaluated one graph node: "softmax")
110238
// and convert the node to a scalar representation.
111-
auto output_c = outputs[0].scalar<float>();
239+
auto output_softmax = outputs[0].scalar<float>();
112240

113241
// (There are similar methods for vectors and matrices here:
114242
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/tensor.h)
115243

116244
// Print the results
117-
std::cout << outputs[0].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
118-
std::cout << output_c() << "\n"; // 30
245+
std::cout << outputs[0].DebugString() << std::endl;
246+
std::cout << "output value: " << output_softmax() << std::endl;
119247

120248
// Free any resources used by the session
121249
session->Close();

Diff for: src/simple_model.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ int main(int argc, char* argv[]) {
1919
// when using `bazel run` since the cwd isn't where you call
2020
// `bazel run` but from inside a temp folder.)
2121
GraphDef graph_def;
22-
std::string model_path = argv[1];
23-
status = ReadBinaryProto(Env::Default(), model_path, &graph_def);
22+
std::string graph_path = argv[1];
23+
status = ReadBinaryProto(Env::Default(), graph_path, &graph_def);
2424
if (!status.ok()) {
2525
std::cout << status.ToString() << std::endl;
2626
return 1;

0 commit comments

Comments
 (0)