Skip to content

Commit

Permalink
Merge branch 'folia2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Ko van der Sloot authored and Ko van der Sloot committed May 22, 2019
2 parents 7cb9035 + f7d2ea8 commit dcd77e1
Show file tree
Hide file tree
Showing 27 changed files with 721 additions and 629 deletions.
9 changes: 6 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
--- # frog.yml
sudo: required
group: edge

# whitelist
branches:
only:
- master
- folia2.0

notifications:
irc:
Expand Down Expand Up @@ -80,6 +82,7 @@ before_install:
- cd ..
- git clone https://github.com/LanguageMachines/libfolia
- cd libfolia
- if [ "$TRAVIS_BRANCH" == "folia2.0" ]; then git checkout folia2.0; fi
- bash bootstrap.sh
- ./configure $OPENMPFLAG
- make
Expand All @@ -94,6 +97,7 @@ before_install:
- cd ..
- git clone https://github.com/LanguageMachines/ucto
- cd ucto
- if [ "$TRAVIS_BRANCH" == "folia2.0" ]; then git checkout folia2.0; fi
- bash bootstrap.sh
- ./configure $OPENMPFLAG
- make
Expand All @@ -120,6 +124,7 @@ before_install:
- make
- sudo make install
- cd ..
- if [ "$TRAVIS_BRANCH" == "folia2.0" ]; then git checkout folia2.0; fi

install:
- bash bootstrap.sh
Expand All @@ -135,9 +140,7 @@ script:
- make check
- git clone https://github.com/LanguageMachines/frogtests.git;
- cd frogtests
- if [ "$TRAVIS_BRANCH" == "new_datastructure" ]; then
git checkout new_datastructure;
fi
- if [ "$TRAVIS_BRANCH" == "folia2.0" ]; then git checkout folia2.0; fi
- frog_bin=/usr/local/bin ./testall >&2

after_failure:
Expand Down
25 changes: 16 additions & 9 deletions include/frog/FrogAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "ticcutils/ServerBase.h"

#include "libfolia/folia.h"
#include "ucto/tokenize.h"

#include "frog/Frog-util.h"
#include "frog/FrogData.h"
Expand All @@ -64,6 +65,7 @@ class FrogOptions {
bool doIOB;
bool doNER;
bool doParse;
bool doTagger;
bool doSentencePerLine;
bool doQuoteDetection;
bool doDirTest;
Expand All @@ -85,9 +87,11 @@ class FrogOptions {
std::string docid;
std::string inputclass;
std::string outputclass;
std::string language;
std::string default_language;
std::set<std::string> languages;
std::string textredundancy;
unsigned int maxParserTokens;
std::string command;

FrogOptions();
private:
Expand All @@ -107,17 +111,19 @@ class FrogAPI {
void FrogFile( const std::string&, std::ostream&, const std::string& );
void FrogServer( Sockets::ServerSocket &conn );
void FrogInteractive();
bool frog_sentence( frog_data&, const size_t );
void run_folia_processor( const std::string&,
std::ostream&,
const std::string& = "" );
void run_text_processor( const std::string&,
std::ostream&,
const std::string& = "" );
frog_data frog_sentence( std::vector<Tokenizer::Token>&,
const size_t );
void run_folia_engine( const std::string&,
std::ostream&,
const std::string& = "" );
void run_text_engine( const std::string&,
std::ostream&,
const std::string& = "" );
folia::FoliaElement* start_document( const std::string&,
folia::Document *& ) const;
folia::FoliaElement *append_to_folia( folia::FoliaElement *,
const frog_data& ) const;
const frog_data&,
unsigned int& ) const;
std::string Frogtostring( const std::string& );
std::string Frogtostringfromfile( const std::string& );

Expand All @@ -134,6 +140,7 @@ class FrogAPI {
void add_parse_result( folia::Sentence *,
const frog_data&,
const std::vector<folia::Word*>& ) const;
folia::processor *add_provenance( folia::Document& ) const;
void test_version( const std::string&, double );
// functions
void FrogStdin( bool prompt );
Expand Down
4 changes: 2 additions & 2 deletions include/frog/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Parser {
};
~Parser();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
void add_provenance( folia::Document& doc, folia::processor * ) const;
void Parse( frog_data&, TimerBlock& );
parseData prepareParse( frog_data& );
void add_result( const frog_data&,
Expand All @@ -82,7 +82,7 @@ class Parser {
bool isInit;
TiCC::LogStream *errLog;
TiCC::LogStream *dbgLog;
std::string version;
std::string _version;
std::string dep_tagset;
std::string POS_tagset;
std::string MWU_tagset;
Expand Down
2 changes: 1 addition & 1 deletion include/frog/cgn_tagger_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CGNTagger: public BaseTagger {
explicit CGNTagger( TiCC::LogStream *l, TiCC::LogStream *d = 0 ):
BaseTagger( l, d, "tagger" ){};
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
void add_declaration( folia::Document&, folia::processor * ) const;
void post_process( frog_data& );
void add_tags( const std::vector<folia::Word*>&,
const frog_data& ) const;
Expand Down
2 changes: 1 addition & 1 deletion include/frog/iob_tagger_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class IOBTagger: public BaseTagger {
explicit IOBTagger( TiCC::LogStream *l, TiCC::LogStream *d =0 ):
BaseTagger( l, d, "IOB" ){};
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
void add_declaration( folia::Document&, folia::processor * ) const;
void Classify( frog_data& );
void post_process( frog_data& );
void add_result( const frog_data& fd,
Expand Down
2 changes: 1 addition & 1 deletion include/frog/mblem_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class Mblem {
explicit Mblem( TiCC::LogStream *, TiCC::LogStream * =0 );
~Mblem();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
void add_provenance( folia::Document&, folia::processor * ) const;
void Classify( frog_record& );
void Classify( const icu::UnicodeString& );
std::vector<std::pair<std::string,std::string> > getResult() const;
Expand Down
2 changes: 1 addition & 1 deletion include/frog/mbma_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Mbma {
explicit Mbma( TiCC::LogStream *, TiCC::LogStream * =0 );
~Mbma();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& doc ) const;
void add_provenance( folia::Document&, folia::processor * ) const;
void Classify( frog_record& );
void Classify( const icu::UnicodeString& );
void filterHeadTag( const std::string& );
Expand Down
2 changes: 1 addition & 1 deletion include/frog/mwu_chunker_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class Mwu {
~Mwu();
void reset();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
void add_provenance( folia::Document&, folia::processor * ) const;
void Classify( frog_data& );
void add( frog_record&, size_t );
void add_result( const frog_data&,
Expand Down
2 changes: 1 addition & 1 deletion include/frog/ner_tagger_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class NERTagger: public BaseTagger {
void post_process( frog_data& );
void post_process( frog_data&,
const std::vector<tc_pair>& );
void addDeclaration( folia::Document& ) const;
void add_declaration( folia::Document&, folia::processor * ) const;
void add_result( const frog_data& fd,
const std::vector<folia::Word*>& wv ) const;
bool read_gazets( const std::string& f, const std::string& p ){
Expand Down
3 changes: 2 additions & 1 deletion include/frog/tagger_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class BaseTagger {
virtual bool init( const TiCC::Configuration& );
virtual void post_process( frog_data& ) = 0;
virtual void Classify( frog_data& );
void addDeclaration( folia::Document& ) const;
virtual void add_declaration( folia::Document&, folia::processor * ) const = 0;
void add_provenance( folia::Document&, folia::processor * ) const;
std::string getTagset() const { return tagset; };
std::string set_eos_mark( const std::string& );
bool fill_map( const std::string&, std::map<std::string,std::string>& );
Expand Down
14 changes: 7 additions & 7 deletions include/frog/ucto_tokenizer_mod.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ class UctoTokenizer {
void setOutputClass( const std::string& );
void setDocID( const std::string& );
void setTextRedundancy( const std::string& );
std::string get_data_version() const;
bool get_setting_info( const std::string&, std::string&, std::string& ) const;
std::vector<std::string> tokenize( const std::string& );
frog_data tokenize_line( const std::string& );
frog_data tokenize_line_next();
frog_data tokenize_stream( std::istream& );
frog_data tokenize_stream_next();
std::vector<Tokenizer::Token> tokenize_line( const std::string&, const std::string& = "" );
std::vector<Tokenizer::Token> tokenize_line_next();
std::vector<Tokenizer::Token> tokenize_stream( std::istream& );
std::vector<Tokenizer::Token> tokenize_stream_next();
std::string tokenizeStream( std::istream& );
std::vector<folia::Word*> add_words( folia::Sentence *,
const std::string&,
const std::string&,
const frog_data& ) const;
void add_provenance( folia::Document& , folia::processor * ) const;
private:
std::vector<Tokenizer::Token> queue; // for the reentrant tokenize_stream()
std::istream *cur_is;
Tokenizer::TokenizerClass *tokenizer;
TiCC::LogStream *errLog;
Expand Down
3 changes: 2 additions & 1 deletion src/Frog-util.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ void getFileNames( const string& dirName,
const string& ext,
set<string>& fileNames ){
DIR *dir = opendir( dirName.c_str() );
if ( !dir )
if ( !dir ){
return;
}
else {
struct stat sb;
struct dirent *entry = readdir( dir );
Expand Down
63 changes: 46 additions & 17 deletions src/Frog.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ bool parse_args( TiCC::CL_Options& Opts,
TiCC::LogStream* theErrLog ){
// process the command line and fill FrogOptions to initialize the API
// also fill some globals we use for our own main.

options.command = Opts.toString();
// is a language-list specified? Default is dutch
string language;
string languages;
Expand All @@ -157,7 +157,8 @@ bool parse_args( TiCC::CL_Options& Opts,
// ok no languages parameter.
// use a (default) configfile. Dutch
configFileName = FrogAPI::defaultConfigFile("nld");
language = "none";
language = "nld";
options.languages.insert( "nld" );
}
else {
vector<string> lang_v = TiCC::split_at( languages, "," );
Expand All @@ -167,6 +168,9 @@ bool parse_args( TiCC::CL_Options& Opts,
return false;
}
language = lang_v[0]; // the first mentioned is the default.
for ( const auto& l : lang_v ){
options.languages.insert( l );
}
if ( lang_v.size() > 1 ){
cerr << "WARNING: you used the --language=" << languages << " option"
<< " with more then one language " << endl
Expand All @@ -184,7 +188,7 @@ bool parse_args( TiCC::CL_Options& Opts,
cerr << "using fallback configuration file: " << configFileName << endl;
}
}
options.language = language;
options.default_language = language;
// override default config settings when a configfile is specified
Opts.extract( 'c', configFileName );
if ( configuration.fill( configFileName ) ){
Expand All @@ -193,6 +197,14 @@ bool parse_args( TiCC::CL_Options& Opts,
if ( !vers.empty() ){
LOG << "configuration version = " << vers << endl;
}
string languages = configuration.getatt( "languages", "tokenizer" );
if ( !languages.empty() ){
vector<string> lang_v = TiCC::split_at( languages, "," );
options.default_language = lang_v[0];
for ( const auto& l : lang_v ){
options.languages.insert( l );
}
}
}
else {
cerr << "failed to read configuration from '" << configFileName << "' !!" << endl;
Expand Down Expand Up @@ -276,20 +288,30 @@ bool parse_args( TiCC::CL_Options& Opts,
}
if ( Opts.extract( "skip", value )) {
string skip = value;
if ( skip.find_first_of("tT") != string::npos )
if ( skip.find_first_of("tT") != string::npos ){
options.doTok = false;
if ( skip.find_first_of("lL") != string::npos )
}
if ( skip.find_first_of("lL") != string::npos ){
options.doLemma = false;
if ( skip.find_first_of("aA") != string::npos )
}
if ( skip.find_first_of("aA") != string::npos ){
options.doMorph = false;
if ( skip.find_first_of("mM") != string::npos )
}
if ( skip.find_first_of("mM") != string::npos ){
options.doMwu = false;
if ( skip.find_first_of("cC") != string::npos )
}
if ( skip.find_first_of("cC") != string::npos ){
options.doIOB = false;
if ( skip.find_first_of("nN") != string::npos )
}
if ( skip.find_first_of("nN") != string::npos ){
options.doNER = false;
if ( skip.find_first_of("pP") != string::npos )
}
if ( skip.find_first_of("gG") != string::npos ){
options.doTagger = false;
}
if ( skip.find_first_of("pP") != string::npos ){
options.doParse = false;
}
else if ( !options.doMwu ){
LOG << " Parser disabled, because MWU is deselected" << endl;
options.doParse = false;
Expand Down Expand Up @@ -454,10 +476,12 @@ bool parse_args( TiCC::CL_Options& Opts,

Opts.extract ("uttmarker", options.uttmark );
if ( !testDirName.empty() ){
if ( options.doXMLin )
if ( options.doXMLin ){
getFileNames( testDirName, ".xml", fileNames );
else
}
else {
getFileNames( testDirName, "", fileNames );
}
if ( fileNames.empty() ){
LOG << "error: couldn't find any files in directory: "
<< testDirName << endl;
Expand Down Expand Up @@ -607,8 +631,9 @@ int main(int argc, char *argv[]) {
if ( outS == 0 ){
if ( wantOUT ){
if ( options.doXMLin ){
if ( !outPath.empty() )
if ( !outPath.empty() ){
outName = outPath + name + ".out";
}
}
else {
outName = outPath + name + ".out";
Expand All @@ -632,13 +657,16 @@ int main(int argc, char *argv[]) {
string xmlOutName = XMLoutFileName;
if ( xmlOutName.empty() ){
if ( !xmlDirName.empty() ){
if ( name.rfind(".xml") == string::npos )
if ( name.rfind(".xml") == string::npos ){
xmlOutName = xmlPath + name + ".xml";
else
}
else {
xmlOutName = xmlPath + name;
}
}
else if ( options.doXMLout )
else if ( options.doXMLout ){
xmlOutName = name + ".xml"; // do not clobber the inputdir!
}
}
if ( !xmlOutName.empty() ){
if ( options.doRetry && TiCC::isFile( xmlOutName ) ){
Expand Down Expand Up @@ -737,8 +765,9 @@ int main(int argc, char *argv[]) {
try {
// Create the socket
Sockets::ServerSocket server;
if ( !server.connect( options.listenport ) )
if ( !server.connect( options.listenport ) ){
throw( runtime_error( "starting server on port " + options.listenport + " failed" ) );
}
if ( !server.listen( 5 ) ) {
// maximum of 5 pending requests
throw( runtime_error( "listen(5) failed" ) );
Expand Down

0 comments on commit dcd77e1

Please sign in to comment.