Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.ReconStats;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.model.recon.StandardReconConfig.ColumnDetail;
import com.google.refine.util.JSONUtilities;

public class CommonsImporter {
Expand Down Expand Up @@ -57,6 +61,7 @@ static public void parse(
category.get("depth").asInt()));
}
String apiUrl = "https://commons.wikimedia.org/w/api.php";//FIXME
String service = "https://commonsreconcile.toolforge.org/en/api";

// initializes progress reporting with the name of the first category
setProgress(job, categoriesWithDepth.get(0).categoryName, 0);
Expand All @@ -79,6 +84,20 @@ static public void parse(
options,
exceptions
);

Column col = project.columnModel.columns.get(0);
StandardReconConfig cfg = new StandardReconConfig(
service,
"https://commons.wikimedia.org/entity/",
"http://www.wikidata.org/prop/direct/",
"",
"entity",
true,
new ArrayList<ColumnDetail>(),
1);
col.setReconStats(ReconStats.create(project, 0));
col.setReconConfig(cfg);

setProgress(job, categoriesWithDepth.get(categoriesWithDepth.size()-1).categoryName, 100);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,31 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.model.Cell;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.model.recon.StandardReconConfig;

/*
/**
* This class takes an Iterator<FileRecord> and converts each FileRecord to one or more rows
*
* @param iteratorFileRecords
*/
public class FileRecordToRows implements TableDataReader {

protected StandardReconConfig reconConfig;

protected String identifierSpace;
protected String schemaSpace;
protected String service;
final Iterator<FileRecord> iteratorFileRecords;
FileRecord fileRecord;
final boolean categoriesColumn;
Expand All @@ -24,10 +38,14 @@ public FileRecordToRows(Iterator<FileRecord> iteratorFileRecords, boolean catego
this.iteratorFileRecords = iteratorFileRecords;
this.categoriesColumn = categoriesColumn;
this.mIdsColumn = mIdsColumn;
this.identifierSpace = "https://commons.wikimedia.org/entity/";
this.schemaSpace = "http://www.wikidata.org/prop/direct/";
this.service = "https://commonsreconcile.toolforge.org/en/api";
this.reconConfig = new StandardReconConfig(service, identifierSpace, schemaSpace, null, null, true, Collections.emptyList());

}

/*
/**
* This method iterates over the parameters of a file record spreading them in rows
*
* @return a row containing a cell per file record parameter
Expand All @@ -49,7 +67,29 @@ public List<Object> getNextRowOfCells() throws IOException {
} else if (iteratorFileRecords.hasNext()) {
fileRecord = iteratorFileRecords.next();
relatedCategoriesIndex = 0;
rowsOfCells.add(fileRecord.fileName);
if (fileRecord.fileName != null && ExpressionUtils.isNonBlankData(fileRecord.fileName)) {
String id = "M" + fileRecord.pageId;
if(id.startsWith(identifierSpace)) {
id = id.substring(identifierSpace.length());
}

ReconCandidate match = new ReconCandidate(id, fileRecord.fileName, new String[0], 100);
Recon newRecon = reconConfig.createNewRecon(0);
newRecon.match = match;
newRecon.candidates = Collections.singletonList(match);
newRecon.matchRank = -1;
newRecon.judgment = Judgment.Matched;
newRecon.judgmentAction = "mass";
newRecon.judgmentBatchSize = 1;

Cell newCell = new Cell(
fileRecord.fileName,
newRecon
);

rowsOfCells.add(newCell);
}

if (mIdsColumn) {
rowsOfCells.add("M" + fileRecord.pageId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ public void testGetNextRowOfCells() throws Exception {
rows.add(frtr.getNextRowOfCells());
rows.add(frtr.getNextRowOfCells());

Assert.assertEquals(rows.get(0), Arrays.asList("File:LasTres.jpg", "M127722", "Category:Costa Rica"));
Assert.assertEquals(rows.get(0).toString(), Arrays.asList("File:LasTres.jpg", "M127722", "Category:Costa Rica").toString());
Assert.assertEquals(rows.get(1), Arrays.asList(null, null, "Category:Cute dogs"));
Assert.assertEquals(rows.get(2), Arrays.asList(null, null, "Category:Costa Rican dogs"));
Assert.assertEquals(rows.get(3), Arrays.asList("File:Playa Gandoca.jpg", "M112933", null));
Assert.assertEquals(rows.get(3).toString(), Arrays.asList("File:Playa Gandoca.jpg", "M112933", null).toString());
Assert.assertEquals(rows.get(4), null);

}
Expand Down