Skip to content

Commit

Permalink
make it working with both master and processor branch of ucto
Browse files Browse the repository at this point in the history
  • Loading branch information
Ko van der Sloot authored and Ko van der Sloot committed Mar 11, 2019
1 parent e07e1b0 commit 2de4458
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 28 deletions.
2 changes: 1 addition & 1 deletion include/frog/ucto_tokenizer_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ class UctoTokenizer {
void setDocID( const std::string& );
void setTextRedundancy( const std::string& );
folia::Document *tokenizestring( const std::string& );
folia::Document *tokenize_folia( const std::string& );
folia::Document *tokenize( std::istream& );
bool tokenize( folia::Document& );
std::vector<std::string> tokenize( const std::string& );
std::string tokenizeStream( std::istream& );
private:
Expand Down
31 changes: 12 additions & 19 deletions src/FrogAPI.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -645,26 +645,20 @@ void FrogAPI::FrogServer( Sockets::ServerSocket &conn ){
if ( options.debugFlag > 5 ){
LOG << "received data [" << result << "]" << endl;
}
folia::Document doc;
try {
doc.readFromString( result );
}
catch ( std::exception& e ){
LOG << "FoLiaParsing failed:" << endl << e.what() << endl;
throw;
}
LOG << "Processing XML... " << endl;
timers.reset();
timers.tokTimer.start();
tokenizer->tokenize( doc );
folia::Document *doc;
doc = tokenizer->tokenize_folia( result );
timers.tokTimer.stop();
FrogDoc( doc );
FrogDoc( *doc );
if ( options.doXMLout ){
doc.save( outputstream, options.doKanon );
doc->save( outputstream, options.doKanon );
}
else {
showResults( outputstream, doc );
showResults( outputstream, *doc );
}
delete doc;
// LOG << "Done Processing XML... " << endl;
}
else {
Expand Down Expand Up @@ -1489,26 +1483,25 @@ void FrogAPI::FrogFile( const string& infilename,
if ( xml_in ){
timers.reset();
timers.tokTimer.start();
folia::Document doc;
folia::Document *doc;
try {
doc.readFromFile( infilename );
doc = tokenizer->tokenize_folia( infilename );
}
catch ( exception &e ){
LOG << "retrieving FoLiA from '" << infilename << "' failed with exception:" << endl;
LOG << e.what() << endl;
throw ( runtime_error( "read failed" ) );
}
tokenizer->setInputXml(true); // THIS IS SILLY, the tokenizer knows it get FoLiA
tokenizer->tokenize( doc );
timers.tokTimer.stop();
FrogDoc( doc );
FrogDoc( *doc );
if ( !options.noStdOut ){
showResults( os, doc );
showResults( os, *doc );
}
if ( !xmlOutFile.empty() ){
doc.save( xmlOutFile, options.doKanon );
doc->save( xmlOutFile, options.doKanon );
LOG << "resulting FoLiA doc saved in " << xmlOutFile << endl;
}
delete doc;
}
else {
ifstream IN( infilename );
Expand Down
35 changes: 27 additions & 8 deletions src/ucto_tokenizer_mod.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,24 @@ string UctoTokenizer::tokenizeStream( istream& is ){
else
throw runtime_error( "ucto tokenizer not initialized" );
}

folia::Document *UctoTokenizer::tokenize_folia( const string& buffer ){
if ( !tokenizer ){
throw runtime_error( "ucto tokenizer not initialized" );
}
folia::Document *doc = new folia::Document();
if ( buffer.find("<?xml " ) == 0 ){
doc->readFromString( buffer );
}
else {
doc->readFromFile( buffer );
}
tokenize( *doc );
return doc;
}

#else

vector<string> UctoTokenizer::tokenize( const string& line ){
if ( tokenizer ){
tokenizer->reset();
Expand All @@ -264,6 +281,16 @@ string UctoTokenizer::tokenizeStream( istream& is ){
else
throw runtime_error( "ucto tokenizer not initialized" );
}

folia::Document *UctoTokenizer::tokenize_folia( const string& buffer ){
if ( !tokenizer ){
throw runtime_error( "ucto tokenizer not initialized" );
}
else {
return tokenizer->tokenize_folia( buffer );
}
}

#endif

folia::Document *UctoTokenizer::tokenize( istream& is ){
Expand All @@ -281,11 +308,3 @@ folia::Document *UctoTokenizer::tokenizestring( const string& s){
else
throw runtime_error( "ucto tokenizer not initialized" );
}

bool UctoTokenizer::tokenize( folia::Document& doc ){
if ( tokenizer )
return tokenizer->tokenize( doc );
else
throw runtime_error( "ucto tokenizer not initialized" );

}

0 comments on commit 2de4458

Please sign in to comment.