Skip to content

Commit

Permalink
Changes to make the BII handle external URIs better. Requires further…
Browse files Browse the repository at this point in the history
… testing by @proccaserra #49
  • Loading branch information
eamonnmag committed Feb 19, 2012
1 parent d7ebd94 commit 4955750
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 561 deletions.
Expand Up @@ -47,10 +47,8 @@
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.hibernate.search.bridge.FieldBridge; import org.hibernate.search.bridge.FieldBridge;
import org.hibernate.search.bridge.LuceneOptions; import org.hibernate.search.bridge.LuceneOptions;
import uk.ac.ebi.bioinvindex.model.Annotation;
import uk.ac.ebi.bioinvindex.model.AssayResult; import uk.ac.ebi.bioinvindex.model.AssayResult;
import uk.ac.ebi.bioinvindex.model.processing.Assay; import uk.ac.ebi.bioinvindex.model.processing.Assay;
import uk.ac.ebi.bioinvindex.model.term.AnnotationTypes;
import uk.ac.ebi.bioinvindex.model.xref.Xref; import uk.ac.ebi.bioinvindex.model.xref.Xref;
import uk.ac.ebi.bioinvindex.search.hibernatesearch.StudyBrowseField; import uk.ac.ebi.bioinvindex.search.hibernatesearch.StudyBrowseField;
import uk.ac.ebi.bioinvindex.utils.datasourceload.DataLocationManager; import uk.ac.ebi.bioinvindex.utils.datasourceload.DataLocationManager;
Expand Down Expand Up @@ -81,31 +79,7 @@ public void set(String s, Object o, Document document, LuceneOptions luceneOptio
dataLocationManager.setEntityManager(entityManager); dataLocationManager.setEntityManager(entityManager);


for (Assay assay : assays) { for (Assay assay : assays) {

Collection<AssayResult> assayResults = ProcessingUtils.findAssayResultsFromAssay(assay);
if (assay.getTechnologyName().equals("mass spectrometry")) {
Collection<AssayResult> assayResults = ProcessingUtils.findAssayResultsFromAssay(assay);

String fileLink = dataLocationManager.getDataLocationLink(assay.getMeasurement().getName(), assay.getTechnologyName(), assay.getStudy().getObfuscationCode(),
AnnotationTypes.GENERIC_DATA_FILE_LINK);

System.out.println("File link: " + fileLink);

String pathLink = dataLocationManager.getDataLocationLink(assay.getMeasurement().getName(), assay.getTechnologyName(), assay.getStudy().getObfuscationCode(),
AnnotationTypes.GENERIC_DATA_FILE_PATH);

System.out.println("Path link: " + pathLink);

for (AssayResult result : assayResults) {
for (Annotation annotation : result.getData().getAnnotation("metaboliteFile")) {
System.out.printf("Type: %s -> Value: %s\n", annotation.getType().getValue(), annotation.getText());

MetaboLightsIndexer.indexMetaboliteFile(pathLink.replace("${study-acc}",
assay.getStudy().getObfuscationCode()) + annotation.getText(),
document,
luceneOptions);
}
}
}


String type = buildType(assay); String type = buildType(assay);


Expand All @@ -114,19 +88,11 @@ public void set(String s, Object o, Document document, LuceneOptions luceneOptio
assayTypeToInfo.put(type, info); assayTypeToInfo.put(type, info);
} }


for (Xref xref : assay.getXrefs()) { createAssayExternalLinks(assayTypeToInfo, assayResults, type);
System.out.println("Adding XREF to AssayTypeInfo: " + xref.getSource().getAcc() + "(" + xref.getAcc() + ") for " + type); createXrefs(assayTypeToInfo, assay, type);

StringBuilder sb = new StringBuilder();
sb.append("xref(").append(xref.getAcc()).append("->");
sb.append(xref.getSource().getAcc()).append(")");

assayTypeToInfo.get(type).addAccession(sb.toString());
}


assayTypeToInfo.get(type).increaseCount(); assayTypeToInfo.get(type).increaseCount();
} }
// each data link should be stored perhaps, or at least whatever is required to make it display in the Study page.


for (String type : assayTypeToInfo.keySet()) { for (String type : assayTypeToInfo.keySet()) {
StringBuilder fullInfo = new StringBuilder(); StringBuilder fullInfo = new StringBuilder();
Expand Down Expand Up @@ -160,6 +126,35 @@ public void set(String s, Object o, Document document, LuceneOptions luceneOptio
entityManager.close(); entityManager.close();
} }


private void createXrefs(Map<String, AssayTypeInfo> assayTypeToInfo, Assay assay, String type) {
for (Xref xref : assay.getXrefs()) {
StringBuilder sb = new StringBuilder();
sb.append("xref(").append(xref.getAcc()).append("->");
sb.append(xref.getSource().getAcc()).append(")");
assayTypeToInfo.get(type).addAccession(sb.toString());
}
}

private void createAssayExternalLinks(Map<String, AssayTypeInfo> assayTypeToInfo, Collection<AssayResult> assayResults, String type) {
Set<String> addedLinks = new HashSet<String>();
for (AssayResult result : assayResults) {
// we're only looking at links...should accommodate webdav etc. too
//todo remove this after testing
if (!result.getData().getName().matches("(http|ftp|https).*") && result.getData().getName().contains("/")) {
// we only store the folder since that will take us to multiple file locations. Otherwise we'd have too
// many individual links pointing to the same place.
String folder = result.getData().getName().substring(0, result.getData().getName().lastIndexOf("/"));
if (!addedLinks.contains(folder)) {
StringBuilder sb = new StringBuilder();
sb.append("link(").append(folder).append("->");
sb.append(result.getData().getType().getName()).append(")");
addedLinks.add(folder);
assayTypeToInfo.get(type).addAccession(sb.toString());
}
}
}
}

private String buildType(Assay assay) { private String buildType(Assay assay) {
return assay.getMeasurement().getName() + "|" + assay.getTechnologyName(); return assay.getMeasurement().getName() + "|" + assay.getTechnologyName();
} }
Expand Down
Expand Up @@ -11,7 +11,8 @@
public enum Repository { public enum Repository {


PRIDE("^[0-9]+","pride"), ARRAYEXPRESS("^(e-).*", "arrayexpress"), GEO("^[(gse)|(ges)]*[0-9]+", "geo"), PRIDE("^[0-9]+","pride"), ARRAYEXPRESS("^(e-).*", "arrayexpress"), GEO("^[(gse)|(ges)]*[0-9]+", "geo"),
ENA("^[(sra)|(ena)]*[0-9]+","embl-bank", "ena", "ebi/ena"), GENERIC("generic", "generic", "none", "generic proteomic location", "generic microarray location"); ENA("^[(sra)|(ena)]*[0-9]+","embl-bank", "ena", "ebi/ena"),
GENERIC("generic", "generic", "none", "generic proteomic location", "generic microarray location");




private String startString; private String startString;
Expand Down
Expand Up @@ -65,6 +65,8 @@
@AutoCreate @AutoCreate
public class SourceURLResolverImpl implements SourceURLResolver { public class SourceURLResolverImpl implements SourceURLResolver {


public static final String EXTERNAL_LINK_PATTERN = "(http|ftp|https).*";

private static Cache<String, String> cache = new BIICache<String, String>(); private static Cache<String, String> cache = new BIICache<String, String>();


private static final Log log = LogFactory.getLog(SourceURLResolverImpl.class); private static final Log log = LogFactory.getLog(SourceURLResolverImpl.class);
Expand All @@ -74,16 +76,21 @@ public class SourceURLResolverImpl implements SourceURLResolver {


public String getRawDataURL(String measurement, String technology, String accession) { public String getRawDataURL(String measurement, String technology, String accession) {
try { try {
if (checkIsExternalLink(accession)) return accession;
return getDataURL(measurement, technology, accession, AnnotationTypes.RAW_DATA_FILE_LINK); return getDataURL(measurement, technology, accession, AnnotationTypes.RAW_DATA_FILE_LINK);
} catch (Exception e) { } catch (Exception e) {
log.error("Unable to resolve Raw data URL"); log.error("Unable to resolve Raw data URL");
return ""; return "";
} }
} }


private boolean checkIsExternalLink(String accession) {
return accession.matches(EXTERNAL_LINK_PATTERN);
}

public String getProcessedDataURL(String measurement, String technology, String accession) { public String getProcessedDataURL(String measurement, String technology, String accession) {
try { try {

if (checkIsExternalLink(accession)) return accession;
return getDataURL(measurement, technology, accession, AnnotationTypes.PROCESSED_DATA_FILE_LINK); return getDataURL(measurement, technology, accession, AnnotationTypes.PROCESSED_DATA_FILE_LINK);
} catch (Exception e) { } catch (Exception e) {
log.error("Unable to resolve Processed data URL"); log.error("Unable to resolve Processed data URL");
Expand Down
Expand Up @@ -94,23 +94,11 @@ public class StudyBeanImpl implements StudyBean {
@In(required = false) @In(required = false)
private StudyIndexLocatorImpl studyIndexLocator; private StudyIndexLocatorImpl studyIndexLocator;



private String studyId, organism, design;
private String organism;

private String design;

private Map<String, List<String>> factorsToValues;

private Map<String, List<String>> characteristicsToValues;

private List<Contact> contacts;

private List<AssayGroupInfo> assayInfos;

private String studyId;

private Collection<Investigation> investigations; private Collection<Investigation> investigations;

private List<AssayGroupInfo> assayInfos;
private List<Contact> contacts;
private Map<String, List<String>> characteristicsToValues, factorsToValues;
private List<String> relatedStudies; private List<String> relatedStudies;


public StudyBeanImpl() { public StudyBeanImpl() {
Expand Down
Expand Up @@ -354,12 +354,12 @@ private AssayInfoBean createAssayInfoBean(AssayInfoBean assayInfoBean, String as
private DataLink createDBLink(String dbLinkRepresentation) { private DataLink createDBLink(String dbLinkRepresentation) {
DataLink link = new DataLink(); DataLink link = new DataLink();


dbLinkRepresentation = dbLinkRepresentation.replace("xref(", "").replace(")", ""); dbLinkRepresentation = dbLinkRepresentation.replace("xref(", "").replace("link(", "").replace(")", "");


if (dbLinkRepresentation.contains("->")) { if (dbLinkRepresentation.contains("->")) {
String[] dbLinkParts = dbLinkRepresentation.split("->"); String[] dbLinkParts = dbLinkRepresentation.split("->");
link.setAcc(dbLinkParts[0]); link.setAcc(dbLinkParts[0]);
link.setSourceName(dbLinkParts[1]); link.setSourceName(dbLinkParts[1].toUpperCase());
} }


return link; return link;
Expand Down

This file was deleted.

0 comments on commit 4955750

Please sign in to comment.