Skip to content

Commit 5439371

Browse files
committed
Make things private; use StandardCharsets.UTF_8
1 parent 5360afa commit 5439371

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

src/edu/stanford/nlp/international/german/scripts/GermanTreebankUDUpdater.java

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,21 @@
55
import edu.stanford.nlp.trees.tregex.TregexPattern;
66

77
import java.io.*;
8+
import java.nio.charset.StandardCharsets;
89
import java.util.*;
910
import java.util.regex.*;
1011

12+
/** @author Jason Bolton */
1113
public class GermanTreebankUDUpdater {
1214

13-
public static LabeledScoredTreeFactory factory = new LabeledScoredTreeFactory();
15+
private static final LabeledScoredTreeFactory factory = new LabeledScoredTreeFactory();
1416

15-
public static HashMap<String,String> wordToSplit = new HashMap<>();
17+
private static final HashMap<String,String> wordToSplit = new HashMap<>();
1618

17-
public static String taggerPath = "edu/stanford/nlp/models/pos-tagger/german-ud.tagger";
19+
private static final String taggerPath = "edu/stanford/nlp/models/pos-tagger/german-ud.tagger";
1820

19-
public static String hyphenatedWordPatternString = "[ÄÖÜäöüẞßA-Za-z]+\\-[ÄÖÜäöüẞßA-Za-z]+";
20-
public static Pattern hyphenatedWordPattern = Pattern.compile(hyphenatedWordPatternString);
21+
private static final String hyphenatedWordPatternString = "[ÄÖÜäöüẞßA-Za-z]+-[ÄÖÜäöüẞßA-Za-z]+";
22+
private static final Pattern hyphenatedWordPattern = Pattern.compile(hyphenatedWordPatternString);
2123

2224
static {
2325
wordToSplit.put("am", "an,dem");
@@ -70,18 +72,19 @@ public static void splitHyphenatedToken(Tree tree) {
7072
public static Tree createTagAndWordNode(String tag, String word) {
7173
Tree wordNode = factory.newLeaf(word);
7274
wordNode.setValue(word);
73-
Tree tagNode = factory.newTreeNode(tag, Arrays.asList(wordNode));
75+
Tree tagNode = factory.newTreeNode(tag, Collections.singletonList(wordNode));
7476
tagNode.setValue(tag);
7577
return tagNode;
7678
}
7779

7880
public static void main(String[] args) throws IOException {
79-
Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8"));
80-
TreeReader tr = new PennTreeReader(r, factory);
81+
Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), StandardCharsets.UTF_8));
82+
8183
TreebankTagUpdater tagUpdater = new TreebankTagUpdater(taggerPath);
82-
/** iterate through trees **/
83-
Tree fullTree = tr.readTree();
84-
while (fullTree != null) {
84+
85+
/* iterate through trees */
86+
TreeReader tr = new PennTreeReader(r, factory);
87+
for (Tree fullTree; (fullTree = tr.readTree()) != null; ) {
8588
TregexPattern pattern;
8689
TregexMatcher matcher;
8790
// split hyphenated token
@@ -102,7 +105,7 @@ public static void main(String[] args) throws IOException {
102105
for (int i = 0 ; i < childrenList.size() ; i++) {
103106
if (childrenList.get(i).value().equals("APPRART-AC")) {
104107
String mwtWord = childrenList.get(i).getLeaves().get(0).value();
105-
if (wordToSplit.keySet().contains(mwtWord)) {
108+
if (wordToSplit.containsKey(mwtWord)) {
106109
matchTree.removeChild(i);
107110
Tree artNKNode = createTagAndWordNode("ART-NK", wordToSplit.get(mwtWord).split(",")[1]);
108111
matchTree.addChild(i,artNKNode);
@@ -116,9 +119,6 @@ public static void main(String[] args) throws IOException {
116119
// print updated tree
117120
tagUpdater.tagTree(fullTree);
118121
System.out.println(fullTree);
119-
120-
// update to next tree
121-
fullTree = tr.readTree();
122122
}
123123
}
124124

0 commit comments

Comments
 (0)