New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix issue #512 to save the file location as a table column #1055
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package com.google.refine.importers.tree; | ||
|
||
|
||
public class ImportParameters { | ||
boolean trimStrings; | ||
boolean storeEmptyStrings; | ||
boolean guessDataType; | ||
boolean includeFileSources; | ||
String fileSource; | ||
|
||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes, | ||
boolean includeFileSources, String fileSource) { | ||
this.trimStrings = trimStrings; | ||
this.storeEmptyStrings = storeEmptyStrings; | ||
this.guessDataType = guessCellValueTypes; | ||
this.includeFileSources = includeFileSources; | ||
this.fileSource = fileSource; | ||
} | ||
|
||
public ImportParameters(boolean trimStrings, boolean storeEmptyStrings, boolean guessCellValueTypes) { | ||
this.trimStrings = trimStrings; | ||
this.storeEmptyStrings = storeEmptyStrings; | ||
this.guessDataType = guessCellValueTypes; | ||
this.includeFileSources = false; | ||
this.fileSource = ""; | ||
} | ||
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missing new line |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,11 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
import com.google.refine.importers.ImportingParserBase; | ||
import com.google.refine.importing.ImportingJob; | ||
import com.google.refine.importing.ImportingUtilities; | ||
import com.google.refine.model.Cell; | ||
import com.google.refine.model.Column; | ||
import com.google.refine.model.ModelException; | ||
import com.google.refine.model.Project; | ||
import com.google.refine.model.Row; | ||
import com.google.refine.util.JSONUtilities; | ||
|
||
/** | ||
|
@@ -210,8 +214,22 @@ protected void parseOneFile( | |
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", true); | ||
boolean storeEmptyStrings = JSONUtilities.getBoolean(options, "storeEmptyStrings", false); | ||
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true); | ||
|
||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, trimStrings, | ||
storeEmptyStrings,guessCellValueTypes); | ||
|
||
// copied from TabularImportingParserBase | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is used in both Tabular & Tree imports, is there a reason it can't be hoisted to ImportingParserBase? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "copied from somewhere" always can be improved. will revisit the room to pull up to parent class. |
||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); | ||
String fileNameColumnName = "File"; | ||
if (includeFileSources) { | ||
if (project.columnModel.getColumnByName(fileNameColumnName) == null) { | ||
try { | ||
project.columnModel.addColumn( | ||
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false); | ||
} catch (ModelException e) { | ||
// Ignore: We already checked for duplicate name. | ||
} | ||
} | ||
} | ||
|
||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit2, | ||
new ImportParameters(trimStrings, storeEmptyStrings,guessCellValueTypes, includeFileSources,fileSource)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -245,26 +245,13 @@ static protected RecordElementCandidate detectRecordElement(TreeReader parser, S | |
return null; | ||
} | ||
|
||
@Deprecated | ||
static public void importTreeData( | ||
TreeReader parser, | ||
Project project, | ||
String[] recordPath, | ||
ImportColumnGroup rootColumnGroup, | ||
int limit | ||
) { | ||
importTreeData(parser, project, recordPath, rootColumnGroup, limit,true,false,true); | ||
} | ||
|
||
static public void importTreeData( | ||
TreeReader parser, | ||
Project project, | ||
String[] recordPath, | ||
ImportColumnGroup rootColumnGroup, | ||
int limit, | ||
boolean trimStrings, | ||
boolean storeEmptyStrings, | ||
boolean guessDataType | ||
ImportParameters parameters | ||
) { | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)"); | ||
|
@@ -273,7 +260,7 @@ static public void importTreeData( | |
while (parser.hasNext()) { | ||
Token eventType = parser.next(); | ||
if (eventType == Token.StartEntity) { | ||
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); | ||
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--,parameters); | ||
} | ||
} | ||
} catch (TreeReaderException e) { | ||
|
@@ -282,18 +269,6 @@ static public void importTreeData( | |
} | ||
} | ||
|
||
@Deprecated | ||
static protected void findRecord( | ||
Project project, | ||
TreeReader parser, | ||
String[] recordPath, | ||
int pathIndex, | ||
ImportColumnGroup rootColumnGroup, | ||
int limit | ||
) throws TreeReaderException { | ||
findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, limit, true, false,true); | ||
} | ||
|
||
/** | ||
* | ||
* @param project | ||
|
@@ -310,9 +285,7 @@ static protected void findRecord( | |
int pathIndex, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing deprecated methods shouldn't be included in this patch. |
||
ImportColumnGroup rootColumnGroup, | ||
int limit, | ||
boolean trimStrings, | ||
boolean storeEmptyStrings, | ||
boolean guessDataType | ||
ImportParameters parameters | ||
) throws TreeReaderException { | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"+Arrays.toString(recordPath)); | ||
|
@@ -331,7 +304,8 @@ static protected void findRecord( | |
while (parser.hasNext() && limit != 0) { | ||
Token eventType = parser.next(); | ||
if (eventType == Token.StartEntity) { | ||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--,trimStrings,storeEmptyStrings,guessDataType); | ||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--, | ||
parameters); | ||
} else if (eventType == Token.EndEntity) { | ||
break; | ||
} else if (eventType == Token.Value) { | ||
|
@@ -340,13 +314,13 @@ static protected void findRecord( | |
String desiredFieldName = recordPath[pathIndex + 1]; | ||
String currentFieldName = parser.getFieldName(); | ||
if (desiredFieldName.equals(currentFieldName)) { | ||
processFieldAsRecord(project, parser, rootColumnGroup,trimStrings,storeEmptyStrings,guessDataType); | ||
processFieldAsRecord(project, parser, rootColumnGroup,parameters); | ||
} | ||
} | ||
} | ||
} | ||
} else { | ||
processRecord(project, parser, rootColumnGroup, trimStrings, storeEmptyStrings, guessDataType); | ||
processRecord(project, parser, rootColumnGroup, parameters); | ||
} | ||
} else { | ||
skip(parser); | ||
|
@@ -364,17 +338,6 @@ static protected void skip(TreeReader parser) throws TreeReaderException { | |
} | ||
} | ||
|
||
/** | ||
* @deprecated on 20120907 by tfmorris -use {@link #processRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} | ||
*/ | ||
@Deprecated | ||
static protected void processRecord( | ||
Project project, | ||
TreeReader parser, | ||
ImportColumnGroup rootColumnGroup | ||
) throws TreeReaderException { | ||
processRecord(project, parser, rootColumnGroup, true, false, true); | ||
} | ||
|
||
/** | ||
* processRecord parses Tree data for a single element and it's sub-elements, | ||
|
@@ -388,32 +351,18 @@ static protected void processRecord( | |
Project project, | ||
TreeReader parser, | ||
ImportColumnGroup rootColumnGroup, | ||
boolean trimStrings, | ||
boolean storeEmptyStrings, | ||
boolean guessDataType | ||
ImportParameters parameter | ||
) throws TreeReaderException { | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); | ||
} | ||
ImportRecord record = new ImportRecord(); | ||
|
||
processSubRecord(project, parser, rootColumnGroup, record, 0, trimStrings, storeEmptyStrings, guessDataType); | ||
addImportRecordToProject(record, project); | ||
processSubRecord(project, parser, rootColumnGroup, record, 0, parameter); | ||
addImportRecordToProject(record, project, parameter.includeFileSources, parameter.fileSource); | ||
} | ||
|
||
/** | ||
* @deprecated 20120907 by tfmorris - use {@link #processFieldAsRecord(Project, TreeReader, ImportColumnGroup, boolean, boolean, boolean)} | ||
*/ | ||
@Deprecated | ||
static protected void processFieldAsRecord( | ||
Project project, | ||
TreeReader parser, | ||
ImportColumnGroup rootColumnGroup | ||
) throws TreeReaderException { | ||
processFieldAsRecord(project, parser, rootColumnGroup, true, false, true); | ||
} | ||
|
||
|
||
|
||
/** | ||
* processFieldAsRecord parses Tree data for a single element and it's sub-elements, | ||
* adding the parsed data as a row to the project | ||
|
@@ -426,9 +375,7 @@ static protected void processFieldAsRecord( | |
Project project, | ||
TreeReader parser, | ||
ImportColumnGroup rootColumnGroup, | ||
boolean trimStrings, | ||
boolean storeEmptyStrings, | ||
boolean guessDataType | ||
ImportParameters parameter | ||
) throws TreeReaderException { | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); | ||
|
@@ -437,19 +384,19 @@ static protected void processFieldAsRecord( | |
ImportRecord record = null; | ||
if (value instanceof String) { | ||
String text = (String) value; | ||
if (trimStrings) { | ||
if (parameter.trimStrings) { | ||
text = text.trim(); | ||
} | ||
if (text.length() > 0 | !storeEmptyStrings) { | ||
if (text.length() > 0 | !parameter.storeEmptyStrings) { | ||
record = new ImportRecord(); | ||
addCell( | ||
project, | ||
rootColumnGroup, | ||
record, | ||
parser.getFieldName(), | ||
(String) value, | ||
storeEmptyStrings, | ||
guessDataType | ||
parameter.storeEmptyStrings, | ||
parameter.guessDataType | ||
); | ||
} | ||
} else { | ||
|
@@ -463,20 +410,25 @@ record = new ImportRecord(); | |
); | ||
} | ||
if (record != null) { | ||
addImportRecordToProject(record, project); | ||
addImportRecordToProject(record, project, | ||
parameter.includeFileSources, parameter.fileSource); | ||
} | ||
} | ||
|
||
static protected void addImportRecordToProject(ImportRecord record, Project project) { | ||
static protected void addImportRecordToProject(ImportRecord record, Project project, | ||
boolean includeFileSources, String fileSource) { | ||
for (List<Cell> row : record.rows) { | ||
if (row.size() > 0) { | ||
Row realRow = null; | ||
Row realRow = new Row(row.size()); ; | ||
for (int c = 0; c < row.size(); c++) { | ||
if (c == 0 && includeFileSources) { // to add the file source: | ||
realRow.setCell( | ||
0, | ||
new Cell(fileSource, null)); | ||
continue; | ||
} | ||
Cell cell = row.get(c); | ||
if (cell != null) { | ||
if (realRow == null) { | ||
realRow = new Row(row.size()); | ||
} | ||
realRow.setCell(c, cell); | ||
} | ||
} | ||
|
@@ -486,19 +438,6 @@ static protected void addImportRecordToProject(ImportRecord record, Project proj | |
} | ||
} | ||
} | ||
|
||
/** | ||
* @deprecated by tfmorris use {@link #processSubRecord(Project, TreeReader, ImportColumnGroup, ImportRecord, int, boolean, boolean, boolean)} | ||
*/ | ||
@Deprecated | ||
static protected void processSubRecord( Project project, | ||
TreeReader parser, | ||
ImportColumnGroup columnGroup, | ||
ImportRecord record, | ||
int level | ||
) throws TreeReaderException { | ||
processSubRecord(project, parser, columnGroup, record, level, true, false, true); | ||
} | ||
|
||
/** | ||
* | ||
|
@@ -514,9 +453,7 @@ static protected void processSubRecord( | |
ImportColumnGroup columnGroup, | ||
ImportRecord record, | ||
int level, | ||
boolean trimStrings, | ||
boolean storeEmptyStrings, | ||
boolean guessDataType | ||
ImportParameters parameter | ||
) throws TreeReaderException { | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"+level+" "+columnGroup); | ||
|
@@ -536,18 +473,18 @@ static protected void processSubRecord( | |
int attributeCount = parser.getAttributeCount(); | ||
for (int i = 0; i < attributeCount; i++) { | ||
String text = parser.getAttributeValue(i); | ||
if (trimStrings) { | ||
if (parameter.trimStrings) { | ||
text = text.trim(); | ||
} | ||
if (text.length() > 0 | !storeEmptyStrings) { | ||
if (text.length() > 0 | !parameter.storeEmptyStrings) { | ||
addCell( | ||
project, | ||
thisColumnGroup, | ||
record, | ||
composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), | ||
text, | ||
storeEmptyStrings, | ||
guessDataType | ||
parameter.storeEmptyStrings, | ||
parameter.guessDataType | ||
); | ||
} | ||
} | ||
|
@@ -561,9 +498,7 @@ static protected void processSubRecord( | |
thisColumnGroup, | ||
record, | ||
level+1, | ||
trimStrings, | ||
storeEmptyStrings, | ||
guessDataType | ||
parameter | ||
); | ||
} else if (//eventType == XMLStreamConstants.CDATA || | ||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) { | ||
|
@@ -572,7 +507,7 @@ static protected void processSubRecord( | |
if (value instanceof String) { | ||
String text = (String) value; | ||
addCell(project, thisColumnGroup, record, colName, text, | ||
storeEmptyStrings, guessDataType); | ||
parameter.storeEmptyStrings, parameter.guessDataType); | ||
} else { | ||
addCell(project, thisColumnGroup, record, colName, value); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should call the constructor above (Line 11) rather than duplicating code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, do you want me to commit the change or create another branch from the fork?