Skip to content

Commit

Permalink
Merge c801e46 into c5e1da3
Browse files Browse the repository at this point in the history
  • Loading branch information
tfmorris committed Apr 11, 2024
2 parents c5e1da3 + c801e46 commit e607c3b
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 28 deletions.
5 changes: 3 additions & 2 deletions main/src/com/google/refine/importers/FixedWidthImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@

package com.google.refine.importers;

import static com.google.refine.importing.ImportingUtilities.getInputStreamReader;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
Expand Down Expand Up @@ -184,7 +185,7 @@ static private ArrayList<Object> getCells(String line, int[] widths) {
static public int[] guessColumnWidths(File file, String encoding) {
try {
InputStream is = new FileInputStream(file);
Reader reader = (encoding != null) ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
Reader reader = getInputStreamReader(is, encoding);
LineNumberReader lineNumberReader = new LineNumberReader(reader);

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
Expand Down Expand Up @@ -242,7 +241,7 @@ static public class Separator {

static public CsvFormat guessFormat(File file, String encoding) {
try (InputStream is = new FileInputStream(file);
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
Reader reader = ImportingUtilities.getInputStreamReader(is, encoding);
LineNumberReader lineNumberReader = new LineNumberReader(reader)) {
CsvParserSettings settings = new CsvParserSettings();
// We could provide a set of delimiters to consider below if we wanted to restrict this
Expand All @@ -265,7 +264,7 @@ static public Separator guessSeparator(File file, String encoding) {
static public Separator guessSeparator(File file, String encoding, boolean handleQuotes) {
try {
try (InputStream is = new FileInputStream(file);
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
Reader reader = ImportingUtilities.getInputStreamReader(is, encoding);
LineNumberReader lineNumberReader = new LineNumberReader(reader)) {

List<Separator> separators = new ArrayList<>();
Expand Down Expand Up @@ -340,4 +339,5 @@ static public Separator guessSeparator(File file, String encoding, boolean handl
}
return null;
}

}
6 changes: 3 additions & 3 deletions main/src/com/google/refine/importers/TextFormatGuesser.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@

package com.google.refine.importers;

import static com.google.refine.importing.ImportingUtilities.getInputStreamReader;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

import com.google.common.base.CharMatcher;
Expand All @@ -56,8 +57,7 @@ public String guess(File file, String encoding, String seedFormat) {
}

InputStream bis = new BoundedInputStream(fis, 64 * 1024); // TODO: This seems like a lot
try (BufferedReader reader = new BufferedReader(
encoding != null ? new InputStreamReader(bis, encoding) : new InputStreamReader(bis))) {
try (BufferedReader reader = new BufferedReader(getInputStreamReader(bis, encoding))) {
int totalChars = 0;
long openBraces = 0;
int closeBraces = 0;
Expand Down
35 changes: 15 additions & 20 deletions main/src/com/google/refine/importing/ImportingUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
Expand Down Expand Up @@ -107,6 +106,17 @@ public class ImportingUtilities {

final public static List<String> allowedProtocols = Arrays.asList("http", "https", "ftp", "sftp");

public static InputStreamReader getInputStreamReader(InputStream inputStream, String encoding) throws IOException {
if (encoding == null) {
return new InputStreamReader(inputStream);
}
// This isn't a real encoding, so needs to be special cased
if (EncodingGuesser.UTF_8_BOM.equals(encoding)) {
return new InputStreamReader(new UnicodeBOMInputStream(inputStream, true), UTF_8);
}
return new InputStreamReader(inputStream, encoding);
}

static public interface Progress {

public void setProgress(String message, int percent);
Expand Down Expand Up @@ -568,26 +578,11 @@ static public Reader getReaderFromStream(InputStream inputStream, ObjectNode fil
if (encoding == null) {
encoding = commonEncoding;
}
if (encoding != null) {

// Special case for UTF-8 with BOM
if (EncodingGuesser.UTF_8_BOM.equals(encoding)) {
try {
return new InputStreamReader(new UnicodeBOMInputStream(inputStream, true), UTF_8);
} catch (IOException e) {
throw new RuntimeException("Exception from UnicodeBOMInputStream", e);
}
} else {
try {
return new InputStreamReader(inputStream, encoding);
} catch (UnsupportedEncodingException e) {
// This should never happen since they picked from a list of supported encodings
throw new RuntimeException("Unsupported encoding: " + encoding, e);
}
}

try {
return getInputStreamReader(inputStream, encoding);
} catch (IOException e) {
throw new RuntimeException("Exception getting InputStreamReader", e);
}
return new InputStreamReader(inputStream);
}

static public File getFile(ImportingJob job, ObjectNode fileRecord) {
Expand Down

0 comments on commit e607c3b

Please sign in to comment.