Skip to content

Commit

Permalink
Fixed issue when names of attached files contain tabs. Tabs will now be
Browse files Browse the repository at this point in the history
replaced with white spaces.
  • Loading branch information
jfarwer committed Mar 8, 2024
1 parent aa10d5d commit 0cb82d4
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
4 changes: 4 additions & 0 deletions src/java/edu/stanford/muse/email/EmailFetcherThread.java
Expand Up @@ -840,6 +840,10 @@ private String handleAttachments(EmailDocument ed,int idx, Message m, Part p, Li
filename = p.getFileName();
if (!filename.trim().isEmpty()) {
filename = filename.trim();
if (filename.contains("\t"))
{
filename = filename.replace("\t", " ");
}
}
} catch (Exception e) {
// seen this happen with:
Expand Down
6 changes: 5 additions & 1 deletion src/java/edu/stanford/muse/ner/NER.java
Expand Up @@ -220,7 +220,11 @@ public void recognizeArchive() throws CancelledException, IOException {
String content = archive.getContents(ldoc, false);
String title = archive.getTitle(ldoc);
//original content is substring of content;

if (title == null || content == null)
{
log.warn("title " + title + " content " + content + " in Ner.recognizeArchive()");
continue;
}
Span[] names = nerModel.find(content);
Span[] namesT = nerModel.find(title);
recTime += System.currentTimeMillis() - st;
Expand Down
13 changes: 6 additions & 7 deletions src/java/edu/stanford/muse/ner/model/NBModel.java
Expand Up @@ -4,14 +4,11 @@
import com.google.common.collect.Multimap;
import edu.stanford.muse.Config;
import edu.stanford.muse.ner.dictionary.EnglishDictionary;
import edu.stanford.muse.ner.featuregen.FeatureUtils;
import edu.stanford.muse.ner.model.test.SequenceModelTest;
import edu.stanford.muse.ner.tokenize.CICTokenizer;
import edu.stanford.muse.ner.tokenize.Tokenizer;
import edu.stanford.muse.util.*;
import edu.stanford.muse.util.Util;
import edu.stanford.muse.util.*;
import edu.stanford.muse.webapp.JSPHelper;
import opennlp.tools.util.featuregen.FeatureGeneratorUtil;
import org.apache.commons.cli.*;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand All @@ -20,10 +17,7 @@

import java.io.*;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
Expand Down Expand Up @@ -251,6 +245,11 @@ private String lookup(String phrase) {


public Span[] find (String content){
if (content == null)
{
log.warn("content null in NBModel.find()");
return null;
}
List<Span> spans = new ArrayList<>();

opennlp.tools.util.Span[] sentSpans = NLPUtils.tokenizeSentenceAsSpan(content);
Expand Down

0 comments on commit 0cb82d4

Please sign in to comment.