Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issue #512 to save the file location as a table column #1055

Merged
merged 1 commit into from Sep 20, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 28 additions & 0 deletions main/src/com/google/refine/importers/tree/ImportParameters.java
@@ -0,0 +1,28 @@
package com.google.refine.importers.tree;


public class ImportParameters {
boolean trimStrings;
boolean storeEmptyStrings;
boolean guessDataType;
boolean includeFileSources;
String fileSource;

public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes,
boolean includeFileSources, String fileSource) {
this.trimStrings = trimStrings;
this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = includeFileSources;
this.fileSource = fileSource;
}

public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) {
this.trimStrings = trimStrings;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should call the constructor above (Line 11) rather than duplicating code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, do you want me to commit the change or create another branch from the fork?

this.storeEmptyStrings = storeEmptyStrings;
this.guessDataType = guessCellValueTypes;
this.includeFileSources = false;
this.fileSource = "";
}

}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing new line

Expand Up @@ -48,7 +48,11 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;

/**
Expand Down Expand Up @@ -210,8 +214,22 @@ protected void parseOneFile(
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true);
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false);
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);

XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings,
storeEmptyStrings,guessCellValueTypes);

// copied from TabularImportingParserBase
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is used in both Tabular & Tree imports, is there a reason it can't be hoisted to ImportingParserBase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"copied from somewhere" always can be improved. will revisit the room to pull up to parent class.

boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
}

XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2,
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource));
}
}
133 changes: 34 additions & 99 deletions main/src/com/google/refine/importers/tree/XmlImportUtilities.java
Expand Up @@ -245,26 +245,13 @@ static protected RecordElementCandidate detectRecordElement(TreeReader parser, S
return null;
}

@Deprecated
static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit
) {
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true);
}

static public void importTreeData(
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup,
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameters
) {
if (logger.isTraceEnabled()) {
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
Expand All @@ -273,7 +260,7 @@ static public void importTreeData(
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,parameters);
}
}
} catch (TreeReaderException e) {
Expand All @@ -282,18 +269,6 @@ static public void importTreeData(
}
}

@Deprecated
static protected void findRecord(
Project project,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws TreeReaderException {
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true);
}

/**
*
* @param project
Expand All @@ -310,9 +285,7 @@ static protected void findRecord(
int pathIndex,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing deprecated methods shouldn't be included in this patch.

ImportColumnGroup rootColumnGroup,
int limit,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameters
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath));
Expand All @@ -331,7 +304,8 @@ static protected void findRecord(
while (parser.hasNext() && limit != 0) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType);
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,
parameters);
} else if (eventType == Token.EndEntity) {
break;
} else if (eventType == Token.Value) {
Expand All @@ -340,13 +314,13 @@ static protected void findRecord(
String desiredFieldName = recordPath[pathIndex + 1];
String currentFieldName = parser.getFieldName();
if (desiredFieldName.equals(currentFieldName)) {
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType);
processFieldAsRecord(project, parser, rootColumnGroup,parameters);
}
}
}
}
} else {
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType);
processRecord(project, parser, rootColumnGroup, parameters);
}
} else {
skip(parser);
Expand All @@ -364,17 +338,6 @@ static protected void skip(TreeReader parser) throws TreeReaderException {
}
}

/**
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processRecord(project, parser, rootColumnGroup, true, false, true);
}

/**
* processRecord parses Tree data for a single element and it's sub-elements,
Expand All @@ -388,32 +351,18 @@ static protected void processRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
}
ImportRecord record = new ImportRecord();

processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType);
addImportRecordToProject(record, project);
processSubRecord(project, parser, rootColumnGroup, record, 0, parameter);
addImportRecordToProject(record, project, parameter.includeFileSources, parameter.fileSource);
}

/**
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws TreeReaderException {
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true);
}



/**
* processFieldAsRecord parses Tree data for a single element and it's sub-elements,
* adding the parsed data as a row to the project
Expand All @@ -426,9 +375,7 @@ static protected void processFieldAsRecord(
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
Expand All @@ -437,19 +384,19 @@ static protected void processFieldAsRecord(
ImportRecord record = null;
if (value instanceof String) {
String text = (String) value;
if (trimStrings) {
if (parameter.trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
if (text.length() > 0 | !parameter.storeEmptyStrings) {
record = new ImportRecord();
addCell(
project,
rootColumnGroup,
record,
parser.getFieldName(),
(String) value,
storeEmptyStrings,
guessDataType
parameter.storeEmptyStrings,
parameter.guessDataType
);
}
} else {
Expand All @@ -463,20 +410,25 @@ record = new ImportRecord();
);
}
if (record != null) {
addImportRecordToProject(record, project);
addImportRecordToProject(record, project,
parameter.includeFileSources, parameter.fileSource);
}
}

static protected void addImportRecordToProject(ImportRecord record, Project project) {
static protected void addImportRecordToProject(ImportRecord record, Project project,
boolean includeFileSources, String fileSource) {
for (List<Cell> row : record.rows) {
if (row.size() > 0) {
Row realRow = null;
Row realRow = new Row(row.size()); ;
for (int c = 0; c < row.size(); c++) {
if (c == 0 && includeFileSources) { // to add the file source:
realRow.setCell(
0,
new Cell(fileSource, null));
continue;
}
Cell cell = row.get(c);
if (cell != null) {
if (realRow == null) {
realRow = new Row(row.size());
}
realRow.setCell(c, cell);
}
}
Expand All @@ -486,19 +438,6 @@ static protected void addImportRecordToProject(ImportRecord record, Project proj
}
}
}

/**
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)}
*/
@Deprecated
static protected void processSubRecord( Project project,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record,
int level
) throws TreeReaderException {
processSubRecord(project, parser, columnGroup, record, level, true, false, true);
}

/**
*
Expand All @@ -514,9 +453,7 @@ static protected void processSubRecord(
ImportColumnGroup columnGroup,
ImportRecord record,
int level,
boolean trimStrings,
boolean storeEmptyStrings,
boolean guessDataType
ImportParameters parameter
) throws TreeReaderException {
if (logger.isTraceEnabled()) {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup);
Expand All @@ -536,18 +473,18 @@ static protected void processSubRecord(
int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) {
String text = parser.getAttributeValue(i);
if (trimStrings) {
if (parameter.trimStrings) {
text = text.trim();
}
if (text.length() > 0 | !storeEmptyStrings) {
if (text.length() > 0 | !parameter.storeEmptyStrings) {
addCell(
project,
thisColumnGroup,
record,
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
text,
storeEmptyStrings,
guessDataType
parameter.storeEmptyStrings,
parameter.guessDataType
);
}
}
Expand All @@ -561,9 +498,7 @@ static protected void processSubRecord(
thisColumnGroup,
record,
level+1,
trimStrings,
storeEmptyStrings,
guessDataType
parameter
);
} else if (//eventType == XMLStreamConstants.CDATA ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
Expand All @@ -572,7 +507,7 @@ static protected void processSubRecord(
if (value instanceof String) {
String text = (String) value;
addCell(project, thisColumnGroup, record, colName, text,
storeEmptyStrings, guessDataType);
parameter.storeEmptyStrings, parameter.guessDataType);
} else {
addCell(project, thisColumnGroup, record, colName, value);
}
Expand Down