Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:HIT-SCIR/ltp into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
liu946 committed Oct 23, 2017
2 parents 2c48d28 + a46ec04 commit 97c4d72
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
24 changes: 23 additions & 1 deletion src/segmentor/segmentor.cpp
Expand Up @@ -233,10 +233,32 @@ void Segmentor::load_lexicon(const char* filename, Model::lexicon_t* lexicon) co
std::ifstream ifs(filename);
if (!ifs.good()) { return; }
std::string line;
bool updated;
std::string full;
std::string tmp;
while (std::getline(ifs, line)) {
trim(line);
std::string form = line.substr(0, line.find_first_of(" \t"));
lexicon->set(form.c_str(), true);
updated = false;
for(int index=0; index<form.size();) {
if((form[index] & 0x80) == 0) {
if(!updated)
full = form.substr(0, index);
strutils::chartypes::sbc2dbc(form.substr(index, 1), tmp);
full += tmp;
index += 1;
updated = true;
} else if ((form[index] & 0xE0) == 0xC0) index += 2;
else if ((form[index] & 0xF0) == 0xE0) index += 3;
else if ((form[index] & 0xF8) == 0xF0) index += 4;
else if ((form[index] & 0xFC) == 0xF8) index += 5;
else if ((form[index] & 0xFE) == 0xFC) index += 6;
else {
ERROR_LOG("Unknown character prefix : 0x%x @ %s\n", form[index], form.c_str());
continue;
}
}
lexicon->set(updated?full.c_str():form.c_str(), true);
}
INFO_LOG("loaded %d lexicon entries", lexicon->size());
}
Expand Down
6 changes: 4 additions & 2 deletions src/server/ltp_server.cpp
Expand Up @@ -521,10 +521,12 @@ static int Service(struct mg_connection *conn) {
TRACE_LOG("Analysis is done.");

std::string strResult;
if (str_format == "xml") {
if (str_format == "xml") { //xml
xml4nlp.SaveDOM(strResult);
} else { //json
} else if (str_format == "json") { //json
strResult = xml2jsonstr(xml4nlp, str_type);
} else { // if str_format not set, or is invalid, use xml
xml4nlp.SaveDOM(strResult);
}


Expand Down

0 comments on commit 97c4d72

Please sign in to comment.