From 8ed1a1578db6a0899630fbdf6903b4c373a031f4 Mon Sep 17 00:00:00 2001 From: Franck Bodmer Date: Thu, 18 Jan 2024 17:25:14 +0100 Subject: [PATCH] opProx feature (Cosmas 2) rebase on origine/master. Review Comment #181: resolved. Squashed commit consisting of - verbosity can be switched on/off on command line. - Prox: parsing %-w1 and %+w1 correctly. - opPROX: correcting order of Prox options: WIP. - beliebige Reihenfolge der Abstands-Optionen: WIP. - Prox: beliebige Reihenfolge der Optionen: OK. - Prox: beliebige Reihenfolge der Optionen: OK. - opPROX: grammar should accept any order of prox. options: WIP. - PROX: return exact error messasge about prox options. - PROX: emit a meaningfull error message: wip. - PROX: emit a meaningfull error message: WIP. - write parsing error to AST. - trying to write error message into an error node of the AST. - PROX: Fehlermeldung in KoralQuery schreiben funktioniert. - Prox... - Error detection inside Prox done. Returning a precise error message through JSON: done. - using addError() for error messages in PROX: WIP. - Prox: reporting exact error messages: works. - PROX: Tests with RecognitionExceptions removed. All Error Codes in StatusCodes.java. - Prox: error messages for wrong prox. options. - Prox: debug output deactivated. - Prox: deleted debug output. - Prox: Test added: WiP. - Prox: 1 working tests added. - Prox: 3 more tests added. changes for Review on opPROX. changes for the review. changes for the review. opPROX: PROX_REMAIN defined as a complementary class. add. test for %. Change-Id: I8802becaf840660a1512281b3477762a422f8b4f Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/8015 Reviewed-by: Nils Diewald --- Changes | 3 + pom.xml | 2 +- src/main/antlr/cosmas/c2ps_opPROX.g | 6 +- .../korap/query/parse/cosmas/c2ps_opPROX.java | 147 ++++++++++++++---- .../serialize/Cosmas2QueryProcessor.java | 10 +- .../query/serialize/QuerySerializer.java | 12 +- .../korap/util/C2RecognitionException.java | 1 - .../cosmas2/Cosmas2QueryProcessorTest.java | 71 ++++++++- 8 files changed, 196 insertions(+), 56 deletions(-) diff --git a/Changes b/Changes index c7704005..d1ccfc21 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,6 @@ +0.43 2024-06-11 + - [feature] Support opProx in C2 (bodmer) + 0.42 2024-01-11 - [feature] Support #REG in C2 (bodmer) - [bugfix] Fix comma in #BED in C2 (bodmer) diff --git a/pom.xml b/pom.xml index b409fe5c..049b3952 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ de.ids-mannheim.korap.koral Koral - 0.42.0 + 0.43.0 jar Koral https://korap.ids-mannheim.de diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g index 1569d1a5..58e00f4d 100644 --- a/src/main/antlr/cosmas/c2ps_opPROX.g +++ b/src/main/antlr/cosmas/c2ps_opPROX.g @@ -38,8 +38,8 @@ DISTVALUE // e.g. /w5umin -> remain = 'umin'. PROX_REMAIN - : (',')? ('b'..'h'|'j'..'l'|'n'|'o'|'q'|'r'|'u'|'v'|'y'|'z'|'B'..'H'|'J'..'L'|'N'|'O'|'Q'|'R'|'U'|'V'|'Y'|'Z') (~ ' ')* ; - + : ~(','|'a'|'i'|'m'|'n'|'p'|'s'|'t'|'w'|'x'|'A'|'I'|'M'|'N'|'P'|'S'|'T'|'W'|'X'|'0'..'9'|'+'|'-'|':'|'/'|'%') (~ ' ')* ; + // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // PROX-Parser @@ -102,4 +102,4 @@ proxGroup | ('max'|'MAX') -> ^(GRP MAX); - \ No newline at end of file + diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java index 62297195..737c7ad0 100644 --- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java +++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java @@ -15,12 +15,121 @@ public class c2ps_opPROX { - final static boolean bDebug = false; + final static boolean + bDebug = false; + + public static final int MLANG_ENGLISH = 0; + public static final int MLANG_GERMAN = 1; + + public static int + messLang = MLANG_ENGLISH; // default. // type of an Error CommonToken: - final static int typeERROR = 1; + final static int + typeERROR = 1; + // Prox error codes defined in StatusCodes.java. + private static String getErrMessEN(int errCode, String text) + + { + switch( errCode ) + { + case StatusCodes.ERR_PROX_MEAS_NULL: + return String.format("Proximity operator at '%s': one of the following prox. types is missing: w,s,p!", text); + + case StatusCodes.ERR_PROX_MEAS_TOOGREAT: + return String.format("Proximity operator at '%s': Please, specify only 1 of the following prox. types: w,s,p! " + + "It is possible to specify several at once by separating them with a ','. E.g.: ' /+w2,s2,p0 '.", text); + + case StatusCodes.ERR_PROX_VAL_NULL: + return String.format("Proximity operator at '%s': please specify a numerical value for the distance. E.g. ' /+w5 '.", text); + + case StatusCodes.ERR_PROX_VAL_TOOGREAT: + return String.format("Proximity operator at '%s': please specify only 1 distance value. E.g. ' /+w5 '.", text); + + case StatusCodes.ERR_PROX_DIR_TOOGREAT: + return String.format("Proximity operator at '%s': please specify either '+' or '-' or none of them for the direction.", text); + + case StatusCodes.ERR_PROX_WRONG_CHARS: + return String.format("Proximity operator at '%s': unknown proximity options!", text); + + case StatusCodes.UNKNOWN_QUERY_ERROR: + return String.format("Unknown error!"); + + default: + return String.format("Proximity operator at '%s': unknown error. The correct syntax looks like this: E.g. ' /+w2 ' or ' /w10,s0 '.", text); + } + } + + private static String getErrMessGE(int errCode, String text) + + { + switch( errCode ) + { + case StatusCodes.ERR_PROX_MEAS_NULL: + return String.format("Abstandsoperator an der Stelle '%s': es fehlt eine der folgenden Angaben: w,s,p!", text); + + case StatusCodes.ERR_PROX_MEAS_TOOGREAT: + return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " + + "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: ' /+w2,s2,p0 ').", text); + + case StatusCodes.ERR_PROX_VAL_NULL: + return String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text); + + case StatusCodes.ERR_PROX_VAL_TOOGREAT: + return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. ' /+w5 ')! ", text); + + case StatusCodes.ERR_PROX_DIR_TOOGREAT: + return String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text); + + case StatusCodes.ERR_PROX_WRONG_CHARS: + return String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text); + + case StatusCodes.UNKNOWN_QUERY_ERROR: + return String.format("Unbekannter Fehler!"); + + default: + return String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: ' /+w2 ' oder ' /w10,s0 '.", text); + } + } + + private static String getErrMess(int errCode, int messLang, String text) + + { + if( messLang == c2ps_opPROX.MLANG_GERMAN ) + return getErrMessGE(errCode, text); + else + return getErrMessEN(errCode, text); + } + + + /** + * in this version, the pre-stored message language is used. + * @param errCode + * @param text + * @return + * 10.06.24/FB + */ + + public static String getErrMess(int errCode, String text) + + { + if( messLang == c2ps_opPROX.MLANG_GERMAN ) + return getErrMessGE(errCode, text); + else + return getErrMessEN(errCode, text); + } + + /** + * buildErrorTree(): + * @param text = part of the query that contains an error. + * @param errCode + * @param typeDIST + * @param pos + * @return + */ + private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, int pos) { @@ -37,38 +146,8 @@ private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, String mess; - switch( errCode ) - { - case StatusCodes.ERR_PROX_MEAS_NULL: - mess = String.format("Abstandsoperator an der Stelle '%s' es fehlt eine der folgenden Angaben: w,s,p!", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - case StatusCodes.ERR_PROX_MEAS_TOOGREAT: - mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " + - "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: /+w2,s0).", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - case StatusCodes.ERR_PROX_VAL_NULL: - mess = String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. /+w5)! ", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - case StatusCodes.ERR_PROX_VAL_TOOGREAT: - mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. /+w5)! ", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - case StatusCodes.ERR_PROX_DIR_TOOGREAT: - mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - case StatusCodes.ERR_PROX_WRONG_CHARS: - mess = String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text); - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - break; - default: - mess = String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: /+w2 oder /w10,s0.", text); - - errorMes = new CommonTree(new CommonToken(typeERROR, mess)); - } + mess = getErrMess(errCode, messLang, text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); errorTree.addChild(errorNode); errorNode.addChild(errorPos); diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java index a6c7bd9d..16af2a3e 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java @@ -9,6 +9,7 @@ import de.ids_mannheim.korap.query.object.KoralOperation; import de.ids_mannheim.korap.query.object.KoralTermGroupRelation; import de.ids_mannheim.korap.query.object.KoralType; +import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; import de.ids_mannheim.korap.query.parse.cosmas.c2psLexer; import de.ids_mannheim.korap.query.parse.cosmas.c2psParser; import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener; @@ -150,7 +151,7 @@ public class Cosmas2QueryProcessor extends Antlr3AbstractQueryProcessor { private boolean reportErrorsinTree(Tree node) { - final String func = "reportErrorsinTree"; + // not used when not debugging: final String func = "reportErrorsinTree"; //System.err.printf("Debug: %s: '%s' has %d children.\n", // func, node.getText(), node.getChildCount()); @@ -178,8 +179,8 @@ private boolean reportErrorsinTree(Tree node) int errCode = node.getChild(1) != null ? Integer.parseInt(node.getChild(1).getText()) : StatusCodes.ERR_PROX_UNKNOWN; String - errMess = node.getChild(2) != null ? node.getChild(2).getText() : "Genaue Fehlermeldung nicht auffindbar."; - + errMess = node.getChild(2) != null ? node.getChild(2).getText() : c2ps_opPROX.getErrMess(StatusCodes.UNKNOWN_QUERY_ERROR, ""); + ArrayList errorSpecs = new ArrayList(); @@ -1844,9 +1845,6 @@ private Tree parseCosmasQuery (String query) { org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3 - // System.out.printf("parseCosmasQuery: tokens = %d\n", tokens.size()); - // System.out.printf("parseCosmasQuery: tokens = %s\n", tokens.toString()); - parser = new c2psParser(tokens); // Use custom error reporters diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java index deaa58e9..a51bae1b 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java @@ -18,8 +18,6 @@ import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; import de.ids_mannheim.korap.query.serialize.util.StatusCodes; -import com.fasterxml.jackson.core.JsonPointer; - /** * Main class for Koral, serializes queries from concrete QLs to KoralQuery * @@ -116,11 +114,9 @@ public static void main (String[] args) { queries = new String[] { args[0] }; ql = args[1]; } - if( args.length >= 3 ) - { - if( args[2].compareToIgnoreCase("-show") == 0 ) - bDebug = true; - } + + if( args.length >= 3 && args[2].compareToIgnoreCase("-show") == 0 ) + bDebug = true; for (String q : queries) { i++; @@ -153,7 +149,7 @@ public static void main (String[] args) { * 'poliqarpplus', 'cqp', 'cosmas2', 'annis' or 'cql'. * @throws IOException */ - + public void run (String query, String queryLanguage, boolean bDebug) throws IOException { ast.verbose = bDebug; // debugging: 01.09.23/FB diff --git a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java index 92ba9ef6..358dd0c2 100644 --- a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java +++ b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java @@ -9,7 +9,6 @@ public final class C2RecognitionException extends RecognitionException { - private static final boolean DEBUG = false; public String mismatchedToken; public C2RecognitionException(String mismatchedToken) diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java index bb4319c7..b3665944 100644 --- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java +++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java @@ -513,6 +513,7 @@ public void testOPNOT () throws JsonProcessingException, IOException { @Test public void testOPPROX () throws JsonProcessingException, IOException { + query = "Sonne /+w1:4 Mond"; qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); @@ -630,31 +631,95 @@ public void testOPPROX () throws JsonProcessingException, IOException { .asText()); assertFalse(res.at("/query/inOrder").asBoolean()); - // 15.01.24/FB: checking syntax error detectiong: + // -- check exclude operator -- // + + query = "Sonne %-w1:2 Sterne"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + /* + System.out.printf("Query '%s': returns: '%s'.\n", query, res.toPrettyString()) ; + System.out.printf("[0]: '%s'.\n", res.at("/query/distances").get(0).get("boundary").toPrettyString()); + System.out.printf("@type: '%s'.\n", res.at("/query/distances").get(0).get("@type").asText()); + System.out.printf("exclude: '%s'.\n", res.at("/query/distances").get(0).get("exclude").asText()); + System.out.printf("key: '%s'.\n", res.at("/query/distances").get(0).get("key").asText()); + */ + + assertEquals("cosmas:distance", res.at("/query/distances").get(0).get("@type").asText()); + assertTrue( res.at("/query/distances").get(0).get("exclude").asBoolean()); + assertEquals("w", res.at("/query/distances").get(0).get("key").asText()); + + // 15.01.24/FB: checking syntax error: query = "Sonne /+w Mond"; // distance value missing. qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); assertTrue(res.get("errors") != null); + //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ; + assertEquals(StatusCodes.ERR_PROX_VAL_NULL, res.get("errors").get(0).get(0).asInt()); query = "Sonne /+2sw Mond"; // 2 distance types instead of 1. qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); - + assertTrue(res.get("errors") != null); + //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ; + assertEquals(StatusCodes.ERR_PROX_MEAS_TOOGREAT, res.get("errors").get(0).get(0).asInt()); query = "Sonne /+2s- Mond"; // 2 distance directions instead of 1. qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); assertTrue(res.get("errors") != null); - + //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ; + assertEquals(StatusCodes.ERR_PROX_DIR_TOOGREAT, res.get("errors").get(0).get(0).asInt()); + query = "Sonne /+2s7 Mond"; // 2 distance values instead of 1. qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); assertTrue(res.get("errors") != null); + //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ; + assertEquals(StatusCodes.ERR_PROX_VAL_TOOGREAT, res.get("errors").get(0).get(0).asInt()); + + // tests for error messages for unknown proximity options: + // 29.05.24/FB + + query = "ab /+w1:2u,p cd"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue("Error code expected!",!res.get("errors").isNull()); + assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt()); + + query = "ab %-w1:2,2su cd"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue("Error code expected!",!res.get("errors").isNull()); + assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt()); + + query = "ab /w1:2s cd"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + //System.out.printf("Query '%s': context: '%s'.\n", query, res.get("@context").toPrettyString()) ; + //System.out.printf("Query '%s': errors : '%s'.\n", query, res.get("errors").toPrettyString()) ; + //System.out.printf("Query '%s': errorCode: '%s'.\n", query, res.get("errors").get(0).get(0).toPrettyString()) ; + //System.out.printf("Query '%s': errorText : '%s'.\n", query, res.get("errors").get(0).get(1).toPrettyString()) ; + //System.out.printf("Query '%s': errorPos : '%s'.\n", query, res.get("errors").get(0).get(2).toPrettyString()) ; + + assertTrue("Error code expected!", res.get("errors") != null); + assertEquals(StatusCodes.ERR_PROX_MEAS_TOOGREAT, res.get("errors").get(0).get(0).asInt()); + + query = "Sonne %-w1:2,+2su Galaxien"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue("Error code expected!", res.get("errors") != null); + assertEquals(StatusCodes.ERR_PROX_WRONG_CHARS, res.get("errors").get(0).get(0).asInt()); + }