Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a metadata table - one-to-many relationship enhanced #83

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ public ResponseEntity<?> getSeqColByDigestAndLevel(
return new ResponseEntity<>(HttpStatus.NOT_FOUND);
}

@GetMapping(value = "/collection/{digest}/metadata")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the rational for making the metadata available for a given digest and we could keep it for now.
However I think the metadata should injected in the SeqCol level2 to and be added with their own properties.

{
  "sequences": [...],
  "names": : [...],
  "length": : [...],
  "naming_convention": GENBANK,
  "source_id": "GCA_000001",
  "source_url": "https://..."
  "ingested_on": 2024-04-21T12:00:00
}

This could be added to SeqColService.getSeqColByDigestAndLevel

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also be added on request via a path variable - e.g. /collection/{digest}?level=2&metadata=true - in case anyone wants the "plain" level 2 object?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also be added on request via a path variable - e.g. /collection/{digest}?level=2&metadata=true - in case anyone wants the "plain" level 2 object?

I think that's a very good idea, it preserves the endpoint specified/required by the spec, and extend it with other optional parameters.

public ResponseEntity<?> getSeqColByDigestAndLevelMetadata(@PathVariable String digest) {
waterflow80 marked this conversation as resolved.
Show resolved Hide resolved
return new ResponseEntity<>(
seqColService.getSeqColMetadataBySeqColDigest(digest), HttpStatus.OK
);
}

@GetMapping("/service-info")
public ResponseEntity<?> getServiceInfo() {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ public abstract class SeqColEntity {

protected String digest; // The level 0 digest

protected NamingConvention namingConvention;


public enum NamingConvention {
ENA, GENBANK, UCSC, TEST
Expand Down
17 changes: 0 additions & 17 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColId.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import javax.persistence.Basic;
import javax.persistence.CollectionTable;
import javax.persistence.Column;
import javax.persistence.ElementCollection;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.IdClass;
import javax.persistence.JoinColumn;
import javax.persistence.Table;
import java.util.HashSet;
import java.util.Set;

@Entity
@NoArgsConstructor
@Data
@Table(name = "sequence_collections_L1")
@IdClass(SeqColId.class)
public class SeqColLevelOneEntity extends SeqColEntity{

@Id
Expand All @@ -32,15 +33,19 @@ public class SeqColLevelOneEntity extends SeqColEntity{
@Basic(fetch = FetchType.LAZY)
private JSONLevelOne seqColLevel1Object;

@Id
@Column(nullable = false)
@Enumerated(EnumType.STRING)
protected NamingConvention namingConvention;
@ElementCollection(fetch = FetchType.LAZY)
@CollectionTable(name = "seqcol_md", joinColumns =
@JoinColumn(name = "digest", nullable = false, updatable = false))
private Set<SeqColMetadataEntity> metadata;

public SeqColLevelOneEntity(String digest, NamingConvention namingConvention, JSONLevelOne jsonLevelOne){
super(digest, namingConvention);
public SeqColLevelOneEntity(String digest, JSONLevelOne jsonLevelOne){
super(digest);
this.seqColLevel1Object = jsonLevelOne;
this.namingConvention = namingConvention;
}

public void addMetadata(SeqColMetadataEntity seqColMetadataEntity){
if(metadata == null) metadata = new HashSet<>();
metadata.add(seqColMetadataEntity);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,4 @@ public SeqColLevelTwoEntity setDigest(String digest) {
this.digest = digest;
return this;
}

public SeqColLevelTwoEntity setNamingConvention(NamingConvention convention) {
this.namingConvention = convention;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package uk.ac.ebi.eva.evaseqcol.entities;

import lombok.Data;
import org.hibernate.annotations.CreationTimestamp;

import javax.persistence.Column;
import javax.persistence.Embeddable;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import java.util.Date;

@Data
@Embeddable
public class SeqColMetadataEntity {

@Column(name = "source_id")
private String sourceIdentifier; // Eg: INSDC Acession

@Column(name = "source_url")
private String sourceUrl;

@Enumerated(EnumType.STRING)
@Column(name = "naming_convention")
private SeqColEntity.NamingConvention namingConvention;

@Column(name = "timestamp", updatable = false, columnDefinition="TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
tcezard marked this conversation as resolved.
Show resolved Hide resolved
@Temporal(TemporalType.TIMESTAMP)
@CreationTimestamp
private Date timestamp;

public SeqColMetadataEntity setNamingConvention(SeqColEntity.NamingConvention namingConvention) {
this.namingConvention = namingConvention;
return this;
}

public SeqColMetadataEntity setSourceIdentifier(String sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
return this;
}

public SeqColMetadataEntity setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package uk.ac.ebi.eva.evaseqcol.repo;

import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;

import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not being used in this class

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


import java.util.List;

@Repository
public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEntity, String> {
Expand All @@ -14,4 +18,10 @@ public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEn
void removeSeqColLevelOneEntityByDigest(String digest);

void deleteAll();

@Query(value = "select source_id, source_url, naming_convention, timestamp from seqcol_md where digest = ?1", nativeQuery = true)
List<Object[]> findMetadataBySeqColDigest(String digest);

@Query(value = "select source_id, source_url, naming_convention, timestamp from seqcol_md", nativeQuery = true)
List<Object[]> findAllMetadata();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;
import uk.ac.ebi.eva.evaseqcol.utils.JSONStringListExtData;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -62,12 +65,17 @@ public List<SeqColLevelOneEntity> getAllSeqColLevelOneObjects(){

/**
* Construct a seqCol level 1 entity out of three seqCol level 2 entities that
* hold names, lengths and sequences objects*/
* hold names, lengths and sequences objects
* TODO: Change the signature of this method and make it accept metadata object instead of namingconvention and source id*/
public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntity<List<String>>> stringListExtendedDataEntities,
List<SeqColExtendedDataEntity<List<Integer>>> integerListExtendedDataEntities,
SeqColEntity.NamingConvention convention) throws IOException {
SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(convention)
.setSourceIdentifier(sourceId);
levelOneEntity.addMetadata(metadata);

// Looping over List<String> types
for (SeqColExtendedDataEntity<List<String>> dataEntity: stringListExtendedDataEntities) {
Expand Down Expand Up @@ -99,14 +107,13 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntit
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
String digest0 = digestCalculator.getSha512Digest(levelOneEntity.toString());
levelOneEntity.setDigest(digest0);
levelOneEntity.setNamingConvention(convention);
return levelOneEntity;
}

/**
* Construct a Level 1 seqCol out of a Level 2 seqCol*/
public SeqColLevelOneEntity constructSeqColLevelOne(
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention) throws IOException {
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
DigestCalculator digestCalculator = new DigestCalculator();
JSONExtData<List<String>> sequencesExtData = new JSONStringListExtData(levelTwoEntity.getSequences());
JSONExtData<List<Integer>> lengthsExtData = new JSONIntegerListExtData(levelTwoEntity.getLengths());
Expand Down Expand Up @@ -151,7 +158,7 @@ public SeqColLevelOneEntity constructSeqColLevelOne(
lengthsExtEntity
);

return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention);
return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention, sourceId);
}

/**
Expand Down Expand Up @@ -208,4 +215,28 @@ public List<SeqColExtendedDataEntity<List<Integer>>> constructIntegerListExtData
return integerListExtendedDataEntities;
}

public List<SeqColMetadataEntity> metadataObjectArrayListToMetadataList(List<Object[]> metadataArray) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this method need to be public ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not necessarily, it can be private in our case. However I'm not sure if we might need to used it outside of this class in the future.

List<SeqColMetadataEntity> metadataList = new ArrayList<>();
for (Object[] metadataElements : metadataArray) {
SeqColMetadataEntity metadataEntity = new SeqColMetadataEntity();
metadataEntity.setSourceIdentifier((String) metadataElements[0]);
metadataEntity.setSourceUrl((String) metadataElements[1]);
metadataEntity.setNamingConvention(SeqColEntity.NamingConvention.valueOf(
(String) metadataElements[2]
));
metadataEntity.setTimestamp((Date) metadataElements[3]);
tcezard marked this conversation as resolved.
Show resolved Hide resolved
metadataList.add(metadataEntity);
}
return metadataList;
}

public List<SeqColMetadataEntity> getAllMetadata() {
List<Object[]> metadataArrayList = repository.findAllMetadata();
return metadataObjectArrayListToMetadataList(metadataArrayList);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth using something like TupleTransformer in the repository to do this? I'm not sure how much difference it makes in practice compared to your conversion method.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've a made a little enhancement in the transformation of the fetched md objects using streams (commit), however using TupleTransformer might be a little bit tricky and require some changes in the design.

This is an example I found online that uses the TupleTransformer:

List<PostRecord> postRecords = entityManager.createQuery("""
    select
        p.id,
        p.title,
        p.createdOn,
        p.createdBy,
        p.updatedOn,
        p.updatedBy
    from Post p
    order by p.id
    """)
.unwrap(org.hibernate.query.Query.class)
.setTupleTransformer(
    (tuple, aliases) -> {
        int i =0;
        return new PostRecord(
            longValue(tuple[i++]),
            stringValue(tuple[i++]),
            new AuditRecord(
                localDateTimeValue(tuple[i++]),
                stringValue(tuple[i++]),
                localDateTimeValue(tuple[i++]),
                stringValue(tuple[i++])
            )
        );
    }
)
.getResultList();

Since we're using an interface repository, implementing this logic might require adding an additional repository class that will execute the custom query.

Not sure if that's gonna make the design more complex or not...

Idk.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, your implementation is pretty neat so let's keep it like that 👍

}

public List<SeqColMetadataEntity> getMetadataBySeqcolDigest(String digest) {
List<Object[]> metadataArrayList = repository.findMetadataBySeqColDigest(digest);
return metadataObjectArrayListToMetadataList(metadataArrayList);
}
}
18 changes: 15 additions & 3 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColExtendedDataEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyAlreadyIngestedException;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.AttributeNotDefinedException;
Expand Down Expand Up @@ -124,6 +125,10 @@ public Optional<? extends SeqColEntity> getSeqColByDigestAndLevel(String digest,
}
}

public List<SeqColMetadataEntity> getSeqColMetadataBySeqColDigest(String digest) {
return levelOneService.getMetadataBySeqcolDigest(digest);
}

/**
* Return the service info entity in a Map<String,Object> format
* @see 'https://seqcol.readthedocs.io/en/dev/specification/#21-service-info'
Expand Down Expand Up @@ -165,6 +170,13 @@ public IngestionResultEntity fetchAndInsertAllSeqColInFastaFile(String accession
* assembly report.
* Return the list of level 0 digests of the inserted seqcol objects*/
public IngestionResultEntity fetchAndInsertAllSeqColByAssemblyAccession(String assemblyAccession) throws IOException {
// Check for existing same source id
boolean sourceIdExists = levelOneService.getAllMetadata().stream()
.anyMatch(md -> md.getSourceIdentifier().equals(assemblyAccession));
if (sourceIdExists) {
logger.warn("Seqcol objects for assembly" + assemblyAccession + " have been already ingested. Nothing to ingest !");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
}
Optional<Map<String, Object>> seqColDataMap = ncbiSeqColDataSource.getAllPossibleSeqColExtendedData(assemblyAccession);
return createSeqColObjectsAndInsert(seqColDataMap, assemblyAccession);
}
Expand Down Expand Up @@ -206,8 +218,8 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O

// Constructing seqCol Level One object
SeqColLevelOneEntity levelOneEntity = levelOneService.constructSeqColLevelOne(
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention()
);
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention(),
assemblyAccession);

try {
Optional<String> seqColDigest = insertSeqColL1AndL2( // TODO: Check for possible self invocation problem
Expand All @@ -227,7 +239,7 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O
}
}
if (ingestionResultEntity.getNumberOfInsertedSeqcols() == 0) {
logger.warn("Seqcol objects for assembly " + assemblyAccession + " has been already ingested");
logger.warn("Seqcol objects for assembly " + assemblyAccession + " have been already ingested");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
} else {
return ingestionResultEntity;
Expand Down
10 changes: 8 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import java.util.Arrays;
Expand All @@ -14,20 +15,26 @@
@Component
public class SeqColGenerator {

private final String GCA_ACCESSION = "GCA_000146045.2";

/**
* Return an example (might not be real) of a seqCol object level 1
* The naming convention is set to GENBANK as a random choice*/
public SeqColLevelOneEntity generateLevelOneEntity() {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(SeqColEntity.NamingConvention.GENBANK)
.setSourceIdentifier(GCA_ACCESSION)
.setSourceUrl("https://test.ncbi.datasourece.uk");
levelOneEntity.addMetadata(metadata);
jsonLevelOne.setNames("mfxUkK3J5y7BGVW7hJWcJ3erxuaMX6xm");
jsonLevelOne.setSequences("dda3Kzi1Wkm2A8I99WietU1R8J4PL-D6");
jsonLevelOne.setLengths("Ms_ixPgQMJaM54dVntLWeovXSO7ljvZh");
jsonLevelOne.setMd5DigestsOfSequences("_6iaYtcWw4TZaowlL7_64Wu9mbHpDUw4");
jsonLevelOne.setSortedNameLengthPairs("QFuKs5Hh8uQwwUtnRxIf8W3zeJoFOp8Z");
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
levelOneEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelOneEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelOneEntity;
}

Expand Down Expand Up @@ -128,7 +135,6 @@ public SeqColLevelTwoEntity generateLevelTwoEntity() {
"YfHZgnpuJm4SN3RN4XL1VWWWZwTXtqw5"
));
levelTwoEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelTwoEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelTwoEntity;
}
}
4 changes: 2 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesUcsc =
(List<SeqColExtendedDataEntity<List<Integer>>>) ucscExtendedDataMap.get("integerListExtDataList");
levelOneEntityUcsc = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC);
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC, GCA_ACCESSION);
Optional<String> resultDigestUcsc = seqColService.addFullSequenceCollection(
levelOneEntityUcsc, extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc);
if (resultDigestUcsc.isPresent()) {
Expand All @@ -163,7 +163,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesGenbank = (List<SeqColExtendedDataEntity<List<Integer>>>) genbankExtendedDataMap.get("integerListExtDataList");

levelOneEntityGenbank = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK);
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK, GCA_ACCESSION);
Optional<String> resultDigestGenbank = seqColService.addFullSequenceCollection(
levelOneEntityGenbank, extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank);
if (resultDigestGenbank.isPresent()) {
Expand Down
Loading
Loading