|
8 | 8 | import edu.stanford.nlp.ling.CoreAnnotations;
|
9 | 9 | import edu.stanford.nlp.ling.CoreLabel;
|
10 | 10 | import edu.stanford.nlp.naturalli.NaturalLogicAnnotations;
|
| 11 | +import edu.stanford.nlp.naturalli.OperatorSpec; |
| 12 | +import edu.stanford.nlp.naturalli.Polarity; |
11 | 13 | import edu.stanford.nlp.pipeline.*;
|
12 | 14 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
|
13 | 15 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
|
@@ -36,32 +38,32 @@ public class Document {
|
36 | 38 | /**
|
37 | 39 | * The empty {@link java.util.Properties} object, for use with creating default annotators.
|
38 | 40 | */
|
39 |
| - static final Properties EMPTY_PROPS = new Properties() {{ |
40 |
| - setProperty("language", "english"); |
41 |
| - setProperty("annotators", ""); |
42 |
| - setProperty("tokenize.class", "PTBTokenizer"); |
43 |
| - setProperty("tokenize.language", "en"); |
44 |
| - setProperty("parse.binaryTrees", "true"); |
45 |
| - }}; |
| 41 | + static final Properties EMPTY_PROPS = PropertiesUtils.asProperties( |
| 42 | + "language", "english", |
| 43 | + "annotators", "", |
| 44 | + "tokenize.class", "PTBTokenizer", |
| 45 | + "tokenize.language", "en", |
| 46 | + "parse.binaryTrees", "true"); |
| 47 | + |
46 | 48 |
|
47 | 49 | /**
|
48 | 50 | * The caseless {@link java.util.Properties} object.
|
49 | 51 | *
|
50 | 52 | * @see Document#caseless()
|
51 | 53 | * @see Sentence#caseless()
|
52 | 54 | */
|
53 |
| - static final Properties CASELESS_PROPS = new Properties() {{ |
54 |
| - setProperty("language", "english"); |
55 |
| - setProperty("annotators", ""); |
56 |
| - setProperty("tokenize.class", "PTBTokenizer"); |
57 |
| - setProperty("tokenize.language", "en"); |
58 |
| - setProperty("parse.binaryTrees", "true"); |
59 |
| - setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/wsj-0-18-caseless-left3words-distsim.tagger"); |
60 |
| - setProperty("parse.model", "edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz"); |
61 |
| - setProperty("ner.model", "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz," + |
| 55 | + static final Properties CASELESS_PROPS = PropertiesUtils.asProperties( |
| 56 | + "language", "english", |
| 57 | + "annotators", "", |
| 58 | + "tokenize.class", "PTBTokenizer", |
| 59 | + "tokenize.language", "en", |
| 60 | + "parse.binaryTrees", "true", |
| 61 | + "pos.model", "edu/stanford/nlp/models/pos-tagger/wsj-0-18-caseless-left3words-distsim.tagger", |
| 62 | + "parse.model", "edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz", |
| 63 | + "ner.model", "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz," + |
62 | 64 | "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz," +
|
63 | 65 | "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz");
|
64 |
| - }}; |
| 66 | + |
65 | 67 |
|
66 | 68 | /**
|
67 | 69 | * The backend to use for constructing {@link edu.stanford.nlp.pipeline.AnnotatorFactory}s.
|
@@ -382,7 +384,7 @@ public static void useServer(String host,
|
382 | 384 | }
|
383 | 385 |
|
384 | 386 |
|
385 |
| - /** |
| 387 | + /* |
386 | 388 | * A static block that'll automatically fault in the CoreNLP server, if the appropriate environment
|
387 | 389 | * variables are set.
|
388 | 390 | * These are:
|
@@ -966,8 +968,8 @@ Document runNatlog(Properties props) {
|
966 | 968 | // Update data
|
967 | 969 | synchronized (serializer) {
|
968 | 970 | for (int i = 0; i < sentences.size(); ++i) {
|
969 |
| - sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setPolarity(ProtobufAnnotationSerializer.toProto(pair.second)), x -> x.get(NaturalLogicAnnotations.PolarityAnnotation.class)); |
970 |
| - sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (pair) -> pair.first.setOperator(ProtobufAnnotationSerializer.toProto(pair.second)), x -> x.get(NaturalLogicAnnotations.OperatorAnnotation.class)); |
| 971 | + sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (Pair<CoreNLPProtos.Token.Builder, Polarity> pair) -> pair.first().setPolarity(ProtobufAnnotationSerializer.toProto(pair.second())), x -> x.get(NaturalLogicAnnotations.PolarityAnnotation.class)); |
| 972 | + sentences.get(i).updateTokens(ann.get(CoreAnnotations.SentencesAnnotation.class).get(i).get(CoreAnnotations.TokensAnnotation.class), (Pair<CoreNLPProtos.Token.Builder, OperatorSpec> pair) -> pair.first().setOperator(ProtobufAnnotationSerializer.toProto(pair.second())), x -> x.get(NaturalLogicAnnotations.OperatorAnnotation.class)); |
971 | 973 | }
|
972 | 974 | }
|
973 | 975 | return this;
|
@@ -1108,7 +1110,7 @@ private CorefChain fromProto(CoreNLPProtos.CorefChain proto) {
|
1108 | 1110 | StringBuilder mentionSpan = new StringBuilder();
|
1109 | 1111 | Sentence sentence = sentence(mentionProto.getSentenceIndex());
|
1110 | 1112 | for (int k = mentionProto.getBeginIndex(); k < mentionProto.getEndIndex(); ++k) {
|
1111 |
| - mentionSpan.append(" ").append(sentence.word(k)); |
| 1113 | + mentionSpan.append(' ').append(sentence.word(k)); |
1112 | 1114 | }
|
1113 | 1115 | // Set the coref cluster id for the token
|
1114 | 1116 | CorefChain.CorefMention mention = new CorefChain.CorefMention(
|
|
0 commit comments