# SBOL2Build

In [9]:
import sbol2
import tyto
from Bio import Restriction
from pydna.dseqrecord import Dseqrecord
from itertools import product
from typing import Dict, Iterable, List, Union, Optional, Tuple

In [10]:
# this document is used by all the next 
doc = sbol2.Document()
doc.addNamespace('http://SBOL2Build#', 'SBOL2Build')

# Restriction enzyme

## Target function in SBOL3

In [11]:
#target function

def ed_restriction_enzyme(name:str, **kwargs) -> sbol3.ExternallyDefined:
    """Creates an ExternallyDefined Restriction Enzyme Component from rebase.

    :param name: Name of the SBOL ExternallyDefined, used by PyDNA. Case sensitive, follow standard restriction enzyme nomenclature, i.e. 'BsaI'
    :param kwargs: Keyword arguments of any other ExternallyDefined attribute.
    :return: An ExternallyDefined object.
    """
    check_enzyme = Restriction.__dict__[name]
    definition=f'http://rebase.neb.com/rebase/enz/{name}.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.8
    return sbol3.ExternallyDefined([sbol3.SBO_PROTEIN], definition=definition, name=name, **kwargs)

NameError: name 'sbol3' is not defined

## SBOL 2 implementation

In [12]:
def rebase_restriction_enzyme(name:str, **kwargs) -> sbol2.ComponentDefinition:
    """Creates an ComponentDefinition Restriction Enzyme Component from rebase.

    :param name: Name of the SBOL ExternallyDefined, used by PyDNA. Case sensitive, follow standard restriction enzyme nomenclature, i.e. 'BsaI'
    :param kwargs: Keyword arguments of any other ComponentDefinition attribute.
    :return: A ComponentDefinition object.
    """
    check_enzyme = Restriction.__dict__[name]
    definition=f'http://rebase.neb.com/rebase/enz/{name}.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.8
    cd = sbol2.ComponentDefinition(name)
    cd.types = sbol2.BIOPAX_PROTEIN
    cd.name = name
    cd.roles = []
    cd.wasDerivedFrom = definition
    cd.description = f'Restriction enzyme {name} from REBASE.'
    return cd



## Tests

In [13]:
bsai = rebase_restriction_enzyme(name="BsaI")

In [14]:
bsai.wasDerivedFrom
print(bsai.name)

BsaI


# DNA ComponentDefinition with Sequence

## Target function in SBOL3

In [None]:
#target helper function
def dna_component_with_sequence(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]:
    """Creates a DNA Component and its Sequence.

    :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'.
    :param sequence: The DNA sequence of the Component encoded in IUPAC.
    :param kwargs: Keyword arguments of any other Component attribute.
    :return: A tuple of Component and Sequence.
    """
    comp_seq = sbol3.Sequence(f'{identity}_seq', elements=sequence, encoding=sbol3.IUPAC_DNA_ENCODING)
    dna_comp = sbol3.Component(identity, sbol3.SBO_DNA, sequences=[comp_seq], **kwargs)
    return dna_comp, comp_seq


## SBOL2 implementation

In [15]:
def dna_componentdefinition_with_sequence2(identity: str, sequence: str, **kwargs) -> Tuple[sbol2.ComponentDefinition, sbol2.Sequence]:
    """Creates a DNA ComponentDefinition and its Sequence.

    :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'.
    :param sequence: The DNA sequence of the Component encoded in IUPAC.
    :param kwargs: Keyword arguments of any other Component attribute.
    :return: A tuple of ComponentDefinition and Sequence.
    """
    comp_seq = sbol2.Sequence(f'{identity}_seq', elements=sequence, encoding=sbol2.SBOL_ENCODING_IUPAC)
    dna_comp = sbol2.ComponentDefinition(identity, sbol2.BIOPAX_DNA, **kwargs)
    dna_comp.sequences = [comp_seq]

    return dna_comp, comp_seq

## Tests

# Part in Backbone from SBOL

In [None]:
#target func

def part_in_backbone_from_sbol(identity: Union[str, None],  sbol_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]:
    """Restructures a non-hierarchical plasmid Component to follow the part-in-backbone pattern following BP011.
    It overwrites the SBOL3 Component provided. 
    A part inserted into a backbone is represented by a Component that includes both the part insert 
    as a feature that is a SubComponent and the backbone as another SubComponent.
    For more information about BP011 visit https://github.com/SynBioDex/SBOL-examples/tree/main/SBOL/best-practices/BP011 

    :param identity: The identity of the Component, is its a String it build a new SBOL Component, if None it adds on top of the input. The identity of Sequence is also identity with the suffix '_seq'.
    :param sbol_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence.
    :param part_location: List of 2 integers that indicates the start and the end of the unitary part. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science.
    :param part_roles: List of strings that indicates the roles to add on the part.
    :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3)
    :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology.    
    :param kwargs: Keyword arguments of any other Component attribute.
    :return: A tuple of Component and Sequence.
    """
    if len(part_location) != 2:
        raise ValueError('The part_location only accepts 2 int values in a list.')
    if len(sbol_comp.sequences)!=1:
        raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences')
    sequence = sbol_comp.sequences[0].lookup().elements
    if identity == None:
        part_in_backbone_component = sbol_comp 
        part_in_backbone_seq = sbol_comp.sequences[0]
    else:
        part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs)
    part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED)
    for part_role in part_roles:  
        part_in_backbone_component.roles.append(part_role)  
    # creating part feature    
    part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1])
    #TODO: add the option of fusion sites to be of different lenghts
    insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1)
    insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3)
    part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles)
    part_sequence_feature.roles.append(tyto.SO.engineered_insert)
    insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site])
    #TODO: infer topology from the input
    if linear:
        part_in_backbone_component.types.append(sbol3.SO_LINEAR)
        part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION)
        # creating backbone feature
        open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1)
        open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3)
        open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2])
    else: 
        part_in_backbone_component.types.append(sbol3.SO_CIRCULAR)
        part_in_backbone_component.roles.append(tyto.SO.plasmid_vector)
        # creating backbone feature
        open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2)
        open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1)
        open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2])
    part_in_backbone_component.features.append(part_sequence_feature)
    part_in_backbone_component.features.append(insertion_sites_feature)
    part_in_backbone_component.features.append(open_backbone_feature)
    backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature)
    part_in_backbone_component.constraints.append(backbone_dropout_meets)
    #TODO: Add a branch to create a component without overwriting the WHOLE input component
    #removing repeated types and roles
    part_in_backbone_component.types = set(part_in_backbone_component.types)
    part_in_backbone_component.roles = set(part_in_backbone_component.roles)
    return part_in_backbone_component, part_in_backbone_seq

In [218]:
def part_in_backbone_from_sbol2(identity: Union[str, None],  sbol_comp: sbol2.ModuleDefinition, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol2.ComponentDefinition, sbol2.Sequence]:
    if len(part_location) != 2:
        raise ValueError('The part_location only accepts 2 int values in a list.')
    if len(sbol_comp.sequences)!=1:
        raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences')
    sequence = doc.find(sbol_comp.sequences[0]).elements
    if identity == None:
        part_in_backbone_component = sbol_comp 
        part_in_backbone_seq = doc.find(sbol_comp.sequences[0]).elements
        part_in_backbone_component.sequences = [part_in_backbone_seq]
    else:
        part_in_backbone_component, part_in_backbone_seq = rebase_dna_component_with_sequence(identity, sequence, **kwargs)
    # double stranded
    part_in_backbone_component.addRole('http://identifiers.org/so/SO:0000985')
    for part_role in part_roles:  
        part_in_backbone_component.addRole(part_role)

    # creating part annotation    
    part_location_comp = sbol2.Range( start=part_location[0], end=part_location[1])
    insertion_site_location1 = sbol2.Range( uri="insertloc1", start=part_location[0], end=part_location[0]+fusion_site_length) #order 1
    insertion_site_location2 = sbol2.Range( uri="insertloc2", start=part_location[1]-fusion_site_length, end=part_location[1]) #order 3

    part_sequence_annotation = sbol2.SequenceAnnotation('part_sequence_annotation')
    part_sequence_annotation.roles = part_roles
    part_sequence_annotation.locations.add(part_location_comp)

    part_sequence_annotation.addRole(tyto.SO.engineered_insert)
    insertion_sites_annotation = sbol2.SequenceAnnotation('insertion_sites_annotation')

    insertion_sites_annotation.locations.add(insertion_site_location1)
    insertion_sites_annotation.locations.add(insertion_site_location2)
    
    insertion_sites_annotation.roles = [tyto.SO.insertion_site]
    if linear:
        part_in_backbone_component.addRole('http://identifiers.org/so/SO:0000987') #linear
        part_in_backbone_component.addRole('http://identifiers.org/so/SO:0000804') #engineered region
        # creating backbone feature
        open_backbone_location1 = sbol2.Range(start=1, end=part_location[0]+fusion_site_length-1) #order 1
        open_backbone_location2 = sbol2.Range(start=part_location[1]-fusion_site_length, end=len(sequence)) #order 3
        open_backbone_annotation = sbol2.SequenceAnnotation(locations=[open_backbone_location1, open_backbone_location2])
    else: 
        part_in_backbone_component.addRole('http://identifiers.org/so/SO:0000988') #circular
        part_in_backbone_component.addRole(tyto.SO.plasmid_vector)
        # creating backbone feature
        open_backbone_location1 = sbol2.Range( uri="backboneloc1", start=1, end=part_location[0]+fusion_site_length-1 ) #order 2
        open_backbone_location2 = sbol2.Range( uri="backboneloc2", start=part_location[1]-fusion_site_length, end=len(sequence)) #order 1
        open_backbone_annotation = sbol2.SequenceAnnotation('open_backbone_annotation')
        open_backbone_annotation.locations.add(open_backbone_location1)
        open_backbone_annotation.locations.add(open_backbone_location2)
        
    part_in_backbone_component.sequenceAnnotations.add(part_sequence_annotation)
    part_in_backbone_component.sequenceAnnotations.add(insertion_sites_annotation)
    part_in_backbone_component.sequenceAnnotations.add(open_backbone_annotation) 
    # use sequenceconstrait with precedes
    # backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_annotation, object=open_backbone_annotation) #????
    backbone_dropout_meets = sbol2.sequenceconstraint.SequenceConstraint(uri='backbone_dropout_meets', restriction=sbol2.SBOL_RESTRICTION_PRECEDES) #might need to add uri as param 2
    backbone_dropout_meets.subject = part_sequence_annotation
    backbone_dropout_meets.object = open_backbone_annotation
    
    part_in_backbone_component.sequenceConstraints.add(backbone_dropout_meets)
    #TODO: Add a branch to create a component without overwriting the WHOLE input component
    #removing repeated types and roles
    part_in_backbone_component.types = set(part_in_backbone_component.types)
    part_in_backbone_component.roles = set(part_in_backbone_component.roles)
    return part_in_backbone_component, part_in_backbone_seq

In [232]:
# benchling plasmid test
doc = sbol2.Document()
benchling_comp = sbol2.ComponentDefinition('benchling_comp', sbol2.BIOPAX_DNA)

benchSeq = sbol2.Sequence('benchSeq', 'tcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcgGGAGtttacagctagctcagtcctaggtattatgctagcTACTCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtct', sbol2.SBOL_ENCODING_IUPAC)

doc.addComponentDefinition(benchling_comp)
doc.addSequence(benchSeq)
benchling_comp.sequences = [benchSeq] 

resultComponent, resultSequence = rebase_part_in_backbone_from_sbol(identity="benchling_comp", sbol_comp=benchling_comp, part_location=[531, 602], part_roles=[], fusion_site_length=6)

doc2 = sbol2.Document()

doc2.addComponentDefinition(resultComponent)
doc2.addComponentDefinition(resultSequence)


http://examples.org/Range/insertloc1/1 at 5205243376
http://examples.org/Range/insertloc2/1 at 5205233920


In [233]:
doc2.write('example1.xml')

'Invalid. sbol-11403:\x00 Strong Validation Error:\x00 The Component referenced by the subject property of a SequenceConstraint MUST be contained by the ComponentDefinition that contains the SequenceConstraint. \x00Reference: SBOL Version 2.3.0 Section 7.7.6 on page 36 :\x00 http://examples.org/ComponentDefinition/benchling_comp/backbone_dropout_meets/1\x00  Validation failed.'

In [174]:
# part in backbone test
doc = sbol2.Document()
sai_plasmid = sbol2.ComponentDefinition('sai_plasmid', sbol2.BIOPAX_DNA)
doc.addComponentDefinition(sai_plasmid)

saiSeq = sbol2.Sequence('sai_sequence', 'gcttcctcgctcactgactcgctgcacgaggcagacctcagcgctagcggagtgtatactggcttactatgttggcactgatgagggtgtcagtgaagtgcttcatgtggcaggagaaaaaaggctgcaccggtgcgtcagcagaatatgtgatacaggatatattccgcttcctcgctcactgactcgctacgctcggtcgttcgactgcggcgagcggaaatggcttacgaacggggcggagatttcctggaagatgccaggaagatacttaacagggaagtgagagggccgcggcaaagccgtttttccataggctccgcccccctgacaagcatcacgaaatctgacgctcaaatcagtggtggcgaaacccgacaggactataaagataccaggcgtttcccctggcggctccctcgtgcgctctcctgttcctgcctttcggtttaccggtgtcattccgctgttatggccgcgtttgtctcattccacgcctgacactcagttccgggtaggcagttcgctccaagctggactgtatgcacgaaccccccgttcagtccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggaaagacatgcaaaagcaccactggcagcagccactggtaattgatttagaggagttagtcttgaagtcatgcgccggttaaggctaaactgaaaggacaagttttggtgactgcgctcctccaagccagttacctcggttcaaagagttggtagctcagagaaccttcgaaaaaccgccctgcaaggcggttttttcgttttcagagcaagagattacgcgcagaccaaaacgatctcaagaagatcatcttattaaggggtctgacgctcagtggaacgaaaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccttagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctcgagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtacatcagagattttgagacacaaCCAATTATTGAAGGCCTCCCTAACGGGGGGCCTTTTTTTGTTTCTGGTCTCCCGCTTTCTATGATTGGTCCAGATTCGTTACCAATTGACAGCTAGCTCAGTCCTAGGTATATACATACATGCTTGTTTGTTTGTAAACAGCGCGGGTGAGAGGGATTCGTTACCAATTGACAATTGATTGGACGTTCAATATAATGCTAGCCTGAAGCGGTCAACGCATGTGCTTTGCGTTCTGATGAGACAGTGATGTCGAAACCGCCTCTACAAATAATTTTGTTTAACTTTACGAGGGCGATCCTATGGCACGTACCCCGAGCCGTAGCAGCATTGGTAGCCTGCGTAGTCCGCATACCCATAAAGCAATTCTGACCAGCACCATTGAAATCCTGAAAGAATGTGGTTATAGCGGTCTGAGCATTGAAAGCGTTGCACGTCGTGCCGGTGCAAGCAAACCGACCATTTATCGTTGGTGGACCAATAAAGCAGCACTGATTGCCGAAGTGTATGAAAATGAAAGCGAACAGGTGCGTAAATTTCCGGATCTGGGTAGCTTTAAAGCCGATCTGGATTTTCTGCTGCGTAATCTGTGGAAAGTTTGGCGTGAAACCATTTGTGGTGAAGCATTTCGTTGTGTTATTGCAGAAGCACAGCTGGACCCTGCAACCCTGACCCAGCTGAAAGATCAGTTTATGGAACGTCGTCGTGAGATGCCGAAAAAACTGGTTGAAAATGCCATTAGCAATGGTGAACTGCCGAAAGATACCAATCGTGAACTGCTGCTGGATATGATTTTTGGTTTTTGTTGGTATCGCCTGCTGACCGAACAGCTGACCGTTGAACAGGATATTGAAGAATTTACCTTCCTGCTGATTAATGGTGTTTGTCCGGGTACACAGCGTTAAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTGTCAACGATCGTTGGCTGTGTTGACAATTAATCATCGGCTCGTATAATGTGTGGAATTGTGAGCGCTCACAATTTACTCCACCGTTGGCTTTTTTCCCTATCAGTGATAGAGATTGACATCCCTATCAGTGATAGAGATAATGAGCACCTGAAGCGCTCAACGGGTGTGCTTCCCGTTCTGATGAGTCCGTGAGGACGAAAGCGCCTCTACAAATAATTTTGTTTAAGAGTCTATGGACTATGTTTTCACAAAGGAAGTACCAGGATGGCACGTAAAACCGCAGCAGAAGCAGAAGAAACCCGTCAGCGTATTATTGATGCAGCACTGGAAGTTTTTGTTGCACAGGGTGTTAGTGATGCAACCCTGGATCAGATTGCACGTAAAGCCGGTGTTACCCGTGGTGCAGTTTATTGGCATTTTAATGGTAAACTGGAAGTTCTGCAGGCAGTTCTGGCAAGCCGTCAGCATCCGCTGGAACTGGATTTTACACCGGATCTGGGTATTGAACGTAGCTGGGAAGCAGTTGTTGTTGCAATGCTGGATGCAGTTCATAGTCCGCAGAGCAAACAGTTTAGCGAAATTCTGATTTATCAGGGTCTGGATGAAAGCGGTCTGATTCATAATCGTATGGTTCAGGCAAGCGATCGTTTTCTGCAGTATATTCATCAGGTTCTGCGTCATGCAGTTACCCAGGGTGAACTGCCGATTAATCTGGATCTGCAGACCAGCATTGGTGTTTTTAAAGGTCTGATTACCGGTCTGCTGTATGAAGGTCTGCGTAGCAAAGATCAGCAGGCACAGATTATCAAAGTTGCACTGGGTAGCTTTTGGGCACTGCTGCGTGAACCGCCTCGTTTTCTGCTGTGTGAAGAAGCACAGATTAAACAGGTGAAATCCTTCGAATAATTCAGCCAAAAAACTTAAGACCGCCGGTCTTGTCCACTACCTTGCAGTAATGCGGTGGACAGGATCGGCGGTTTTCTTTTCTCTTCTCAAGGGCACCAGGAATCTGAACGATTCGTTACCAATTGACATATTTAAAATTCTTGTTTAAAATGCTAGCTCGTCACTAGAGGGCGATAGTGACAAACTTGACAACTCATCACTTCCTAGGTATAATGCTAGCCTGAAGAAGTCAATTAATGTGCTTTTAATTCTGATGAGTCGGTGACGACGAAACTTCCTCTACAAATAATTTTGTTTAACCCCCCGAGGAGTAGCACATGCCGAAACTGGGTATGCAGAGCATTCGTCGTCGTCAGCTGATTGATGCAACCCTGGAAGCAATTAATGAAGTTGGTATGCATGATGCAACCATTGCACAGATTGCACGTCGTGCCGGTGTTAGCACCGGTATTATTAGCCATTATTTCCGCGATAAAAACGGTCTGCTGGAAGCAACCATGCGTGATATTACCAGCCAGCTGCGTGATGCAGTTCTGAATCGTCTGCATGCACTGCCGCAGGGTAGCGCAGAACAGCGTCTGCAGGCAATTGTTGGTGGTAATTTTGATGAAACCCAGGTTAGCAGCGCAGCAATGAAAGCATGGCTGGCATTTTGGGCAAGCAGCATGCATCAGCCGATGCTGTATCGTCTGCAGCAGGTTAGCAGTCGTCGTCTGCTGAGCAATCTGGTTAGCGAATTTCGTCGTGAACTGCCTCGTGAACAGGCACAAGAGGCAGGTTATGGTCTGGCAGCACTGATTGATGGTCTGTGGCTGCGTGCAGCACTGAGCGGTAAACCGCTGGATAAAACCCGTGCAAATAGCCTGACCCGTCATTTTATCACCCAGCATCTGCCGACCGATTAACCAATTATTGAACACCCTTCGGGGTGTTTTTTTGTTTCTGGTCTACCGTAATACTCCACCGTTGGCTTTTTTCCCTATCAGTGATAGAGATTGACATCCCTATCAGTGATAGAGATAATGAGCACCTGAAGGGGTCAGTTGATGTGCTTTCAACTCTGATGAGTCAGTGATGACGAAACCCCCTCTACAAATAATTTTGTTTAACTATGGACTATGTTTTCACATACGAGGGGGATTAGATGAACAAAACCATTGATCAGGTGCGTAAAGGTGATCGTAAAAGCGATCTGCCGGTTCGTCGTCGTCCGCGTCGTAGTGCCGAAGAAACCCGTCGTGATATTCTGGCAAAAGCCGAAGAACTGTTTCGTGAACGTGGTTTTAATGCAGTTGCCATTGCAGATATTGCAAGCGCACTGAATATGAGTCCGGCAAATGTGTTTAAACATTTTAGCAGCAAAAACGCACTGGTTGATGCAATTGGTTTTGGTCAGATTGGTGTTTTTGAACGTCAGATTTGTCCGCTGGATAAAAGCCATGCACCGCTGGATCGTCTGCGTCATCTGGCACGTAATCTGATGGAACAGCATCATCAGGATCATTTCAAACACATACGGGTTTTTATTCAGATCCTGATGACCGCCAAACAGGATATGAAATGTGGCGATTATTACAAAAGCGTGATTGCAAAACTGCTGGCCGAAATTATTCGTGATGGTGTTGAAGCAGGTCTGTATATTGCAACCGATATTCCGGTTCTGGCAGAAACCGTTCTGCATGCACTGACCAGCGTTATTCATCCGGTTCTGATTGCACAAGAAGATATTGGTAATCTGGCAACCCGTTGTGATCAGCTGGTTGATCTGATTGATGCAGGTCTGCGTAATCCGCTGGCAAAATAACCAATTATTGAACACCCTAACGGGTGTTTTTTTTTTTTTGGTCTACCCGCTAACGATCGTTGGCTGTGTTGACAATTAATCATCGGCTCGTATAATGTGTGGAATTGTGAGCGCTCACAATTCTGAAGTAGTCACCGGCTGTGCTTGCCGGTCTGATGAGCCTGTGAAGGCGAAACTACCTCTACAAATAATTTTGTTTAAACCCCCGAGATGAAATACATCCTGTTTGAGGTGTGCGAAATGGGTAAAAGCCGTGAACAGACCATGGAAAATATTCTGAAAGCAGCCAAAAAGAAATTCGGCGAACGTGGTTATGAAGGCACCAGCATTCAAGAAATTACCAAAGAAGCCAAAGTTAACGTTGCAATGGCCAGCTATTACTTTAATGGCAAAGAGAACCTGTACTACGAGGTGTTCAAAAAATACGGTCTGGCAAATGAACTGCCGAACTTTCTGGAAAAAAACCAGTTTAATCCGATTAATGCCCTGCGTGAATATCTGACCGTTTTTACCACCCACATTAAAGAAAATCCGGAAATTGGCACCCTGGCCTATGAAGAAATTATCAAAGAAAGCGCACGCCTGGAAAAAATCAAACCGTATTTTATCGGCAGCTTCGAACAGCTGAAAGAAATTCTGCAAGAGGGTGAAAAACAGGGTGTGTTTCACTTTTTTAGCATCAACCATACCATCCATTGGATTACCAGCATTGTTCTGTTTCCGAAATTCAAAAAATTCATCGATAGCCTGGGTCCGAATGAAACCAATGATACCAATCATGAATGGATGCCGGAAGATCTGGTTAGCCGTATTATTAGCGCACTGACCGATAAACCGAACATTTAAAACGCATGAGAAAGCCCCCGGAAGATCACCTTCCGGGGGCTTTTTTATTGCGCAGGTACTTTTCATACTCCCGCCATTCAGAGAAGAAACCAATTGTCCATATTGCATCAGACATTGCCGTCACTGCGTCTTTTACTGGCTCTTCTCGCTAACCAAACCGGTAACCCCGCTTATTAAAAGCATTCTGTAACAAAGCGGGACCAAAGCCATGACAAAAACGCGTAACAAAAGTGTCTATAATCACGGCAGAAAAGTCCACATTGATTATTTGCACGGCGTCACACTTTGCTATGCCATAGCATTTTTATCCATAAGATTAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATACCCGTTTTTTTGGGCTAGCCTGAAGGGTGTCTCAAGGTGCGTACCTTGACTGATGAGTCCGAAAGGACGAAACACCCCTCTACAAATAATTTTGTTTAAAATGTTCCCTAATAATCAGCAAAGAGGTTACTAGATGGCAGGCGCAGTTGGTCGTCCGCGTCGTAGTGCACCGCGTCGTGCAGGTAAAAATCCGCGTGAAGAAATTCTGGATGCAAGCGCAGAACTGTTTACCCGTCAGGGTTTTGCAACCACCAGTACCCATCAGATTGCAGATGCAGTTGGTATTCGTCAGGCAAGCCTGTATTATCATTTTCCGAGCAAAACCGAAATCTTTCTGACCCTGCTGAAAAGCACCGTTGAACCGAGCACCGTTCTGGCAGAAGATCTGAGCACCCTGGATGCAGGTCCGGAAATGCGTCTGTGGGCAATTGTTGCAAGCGAAGTTCGTCTGCTGCTGAGCACCAAATGGAATGTTGGTCGTCTGTATCAGCTGCCGATTGTTGGTAGCGAAGAATTTGCAGAATATCATAGCCAGCGTGAAGCACTGACCAATGTTTTTCGTGATCTGGCAACCGAAATTGTTGGTGATGATCCGCGTGCAGAACTGCCGTTTCATATTACCATGAGCGTTATTGAAATGCGTCGCAATGATGGTAAAATTCCGAGTCCGCTGAGCGCAGATAGCCTGCCGGAAACCGCAATTATGCTGGCAGATGCAAGCCTGGCAGTTCTGGGTGCACCGCTGCCTGCAGATCGTGTTGAAAAAACCCTGGAACTGATTAAACAGGCAGATGCAAAATAACTCGGTACCAAAGACGAACAATAAGACGCTGAAAAGCGTCTTTTTTCGTTTTGGTCCAATGgTCACCATATATCAAGtttacggctagctcagtcctaggtactatgctagctactagagaaagaggagaaatactagatggctgaagcgcaaaatgatcccctgctgccgggatactcgtttaatgcccatctggtggcgggtttaacgccgattgaggccaacggttatctcgatttttttatcgaccgaccgctgggaatgaaaggttatattctcaatctcaccattcgcggtcagggggtggtgaaaaatcagggacgagaatttgtttgccgaccgggtgatattttgctgttcccgccaggagagattcatcactacggtcgtcatccggaggctcgcgaatggtatcaccagtgggtttactttcgtccgcgcgcctactggcatgaatggcttaactggccgtcaatatttgccaatacggggttctttcgcccggatgaagcgcaccagccgcatttcagcgacctgtttgggcaaatcattaacgccgggcaaggggaagggcgctattcggagctgctggcgataaatctgcttgagcaattgttactgcggcgcatggaagcgattaacgagtcgctccatccaccgatggataatcgggtacgcgaggcttgtcagtacatcagcgatcacctggcagacagcaattttgatatcgccagcgtcgcacagcatgtttgcttgtcgccgtcgcgtctgtcacatcttttccgccagcagttagggattagcgtcttaagctggcgcgaggaccaacgtatcagccaggcgaagctgcttttgagcaccacccggatgcctatcgccaccgtcggtcgcaatgttggttttgacgatcaactctatttctcgcgggtatttaaaaaatgcaccggggccagcccgagcgagttccgtgccggttaataaCCAATTATTGAAGGCCGCTAACGCAGCCTTTTTTTGTTTCTGGTCTCCCAATGGCGGCGCGCCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCGGAAGAGAGTCAATTCAGGGTGGTGAATATGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGGTGCCAGCGTGGTGGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTTCTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCCATTGCTGTGGAAGCTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTTCTCCCATGAGGACGGTACGCGACTGGGCGTGGAGCATCTGGTCGCATTGGGTCACCAGCAAATCGCGCTGTTAGCGGGCCCATTAAGTTCTGTCTCGGCGCGTCTGCGTCTGGCTGGCTGGCATAAATATCTCACTCGCAATCAAATTCAGCCGATAGCGGAACGGGAAGGCGACTGGAGTGCCATGTCCGGTTTTCAACAAACCATGCAAATGCTGAATGAGGGCATCGTTCCCACTGCGATGCTGGTTGCCAACGATCAGATGGCGCTGGGCGCAATGCGCGCCATTACCGAGTCCGGGCTGCGCGTTGGTGCGGATATCTCGGTAGTGGGATACGACGATACCGAAGATAGCTCATGTTATATCCCGCCGTTAACCACCATCAAACAGGATTTTCGCCTGCTGGGGCAAACCAGCGTGGACCGCTTGCTGCAACTCTCTCAGGGCCAGGCGGTGAAGGGCAATCAGCTGTTGCCAGTCTCACTGGTGAAAAGAAAAACCACCCTGGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGATAATCCAGGAGGAAAAAAATGtccagattagataaaagtaaagtgattaacagcgcattagagctgcttaatgaggtcggaatcgaaggtttaacaacccgtaaactcgcccagaagctaggtgtagagcagcctacattgtattggcatgtaaaaaataagcgggctttgctcgacgccttagccattgagatgttagataggcaccatactcacttttgccctttagaaggggaaagctggcaagattttttacgtaataacgctaaaagttttagatgtgctttactaagtcatcgcgatggagcaaaagtacatttaggtacacggcctacagaaaaacagtatgaaactctcgaaaatcaattagcctttttatgccaacaaggtttttcactagagaatgcattatatgcactcagcgctgtggggcattttactttaggttgcgtattggaagatcaagagcatcaagtcgctaaagaagaaagggaaacacctactactgatagtatgccgccattattacgacaagctatcgaattatttgatcaccaaggtgcagagccagccttcttattcggccttgaattgatcatatgcggattagaaaaacaacttaaatgtgaaagtgggtcctaataattggtaacgaatcagacaattgacggctcgagggagtagcatagggtttgcagaatccctgcttcgtccatttgacaggcacattatgcatcgatgataagctgtcaaacatgagcagatcctctacgccggacgcatcgtggccggcatcaccggcgccacaggtgcggttgctggcgcctatatcgccgacatcaccgatggggaagatcgggctcgccacttcgggctcatgagcaaatattttatctgaggt', sbol2.SBOL_ENCODING_IUPAC)
doc.addSequence(saiSeq)
sai_plasmid.sequences = [saiSeq] 

print(saiSeq.elements)

rebase_part_in_backbone_from_sbol('new_part', sai_plasmid, [6762, 7601], [], 3)

gcttcctcgctcactgactcgctgcacgaggcagacctcagcgctagcggagtgtatactggcttactatgttggcactgatgagggtgtcagtgaagtgcttcatgtggcaggagaaaaaaggctgcaccggtgcgtcagcagaatatgtgatacaggatatattccgcttcctcgctcactgactcgctacgctcggtcgttcgactgcggcgagcggaaatggcttacgaacggggcggagatttcctggaagatgccaggaagatacttaacagggaagtgagagggccgcggcaaagccgtttttccataggctccgcccccctgacaagcatcacgaaatctgacgctcaaatcagtggtggcgaaacccgacaggactataaagataccaggcgtttcccctggcggctccctcgtgcgctctcctgttcctgcctttcggtttaccggtgtcattccgctgttatggccgcgtttgtctcattccacgcctgacactcagttccgggtaggcagttcgctccaagctggactgtatgcacgaaccccccgttcagtccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggaaagacatgcaaaagcaccactggcagcagccactggtaattgatttagaggagttagtcttgaagtcatgcgccggttaaggctaaactgaaaggacaagttttggtgactgcgctcctccaagccagttacctcggttcaaagagttggtagctcagagaaccttcgaaaaaccgccctgcaaggcggttttttcgttttcagagcaagagattacgcgcagaccaaaacgatctcaagaagatcatcttattaaggggtctgacgctcagtggaacgaaaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccttagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaat

SBOLError: (<SBOLErrorCode.SBOL_ERROR_URI_NOT_UNIQUE: 17>, 'The object http://examples.org/insertion_sites_annotation/example/1 is already contained by the http://sbols.org/v2#location property')

In [None]:
# Initialize SBOL2 Document
sbol2.Config.setOption('sbol_typed_uris', True)
doc = sbol2.Document()

# Create a ComponentDefinition with a DNA sequence
component_def = sbol2.ComponentDefinition('example_component', sbol2.BIOPAX_DNA)
doc.addComponentDefinition(component_def)

# Add a sequence to the ComponentDefinition
sequence = sbol2.Sequence('example_sequence', 'ATGCTGACTGCTAGCTGACTAGC', sbol2.SBOL_ENCODING_IUPAC)
doc.addSequence(sequence)
component_def.sequences = [sequence.identity]  # Associate the sequence with the ComponentDefinition

# Create a SequenceAnnotation
seq_annotation = sbol2.SequenceAnnotation('example_annotation')
range_location = sbol2.Range('new range', 1, 6)  # Create a Range object
seq_annotation.locations.add(range_location)  # Add it to the annotation
component_def.sequenceAnnotations.add(seq_annotation)

# Optionally add a role (e.g., promoter, CDS) to describe the feature
seq_annotation.roles = ['http://identifiers.org/so/SO:0000167']  # Promoter role from Sequence Ontology

# Save the SBOL2 document
doc.write('sequence_annotation_example.xml')

print('SequenceAnnotation created successfully!')


In [None]:
component_def = sbol2.ComponentDefinition('example_component', sbol2.BIOPAX_DNA)
sequence = sbol2.Sequence('example_sequence', 'ATGCTGACTGCTAGCTGACTAGC', sbol2.SBOL_ENCODING_IUPAC)
component_def.sequences = [sequence.identity]  # Associate the sequence with the ComponentDefinition


anno_1 = sbol2.SequenceAnnotation('new_anno')
anno_2 = sbol2.SequenceAnnotation('new_anno_2')

range_location = sbol2.Range('new range', 1, 6)  # Create a Range object
anno_1.locations.add(range_location)  # Add it to the annotation)

component_def.sequenceAnnotations.add(anno_1)
component_def.sequenceAnnotations.add(anno_2)

print(sbol2.SBOL_RESTRICTION_PRECEDES)

# new_constraint = sbol2.SequenceConstraint(restriction=SBOL_RESTRICTION_PRECEDES, subject=anno_1, object=anno_2) #????
backbone_dropout_meets = sbol2.sequenceconstraint.SequenceConstraint (
    'backbone_dropout',
    'backbone_dropout_uri',
    anno_1,
    anno_2,
    sbol2.SBOL_RESTRICTION_PRECEDES
)

component_def.sequenceConstraints.add(backbone_dropout_meets)

sbol2.DNA

In [13]:
#target func
class Assembly_plan_composite_in_backbone_single_enzyme():
    """Creates a Assembly Plan.
    :param name: Name of the assembly plan Component.
    :param parts_in_backbone: Parts in backbone to be assembled. 
    :param acceptor_backbone:  Backbone in which parts are inserted on the assembly. 
    :param restriction_enzymes: Restriction enzyme with correct name from Bio.Restriction as Externally Defined.
    :param document: SBOL Document where the assembly plan will be created.
    :param linear: Boolean to inform if the reactant is linear.
    :param circular: Boolean to inform if the reactant is circular.
    :param **kwargs: Keyword arguments of any other Component attribute for the assembled part.
    """

    def __init__(self, name: str, parts_in_backbone: List[sbol3.Component], acceptor_backbone: sbol3.Component, restriction_enzyme: Union[str,sbol3.ExternallyDefined], document:sbol3.Document):
        self.name = name
        self.parts_in_backbone = parts_in_backbone
        self.acceptor_backbone = acceptor_backbone
        self.restriction_enzyme = restriction_enzyme
        self.products = []
        self.extracted_parts = []
        self.document = document

        #create assembly plan
        self.assembly_plan_component = sbol3.Component(identity=f'{self.name}_assembly_plan', types=sbol3.SBO_FUNCTIONAL_ENTITY)
        self.document.add(self.assembly_plan_component)
        self.composites = []

    def run(self):
        self.assembly_plan_component.features.append(self.restriction_enzyme)
        #extract parts
        part_number = 1
        for part_in_backbone in self.parts_in_backbone:
            part_comp, part_seq = digestion(reactant=part_in_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}_{part_in_backbone.display_id}')
            self.document.add([part_comp, part_seq])
            self.extracted_parts.append(part_comp)
            part_number += 1
        #extract backbone (should be the same?)
        backbone_comp, backbone_seq = digestion(reactant=self.acceptor_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component,  name=f'part_{part_number}')
        self.document.add([backbone_comp, backbone_seq])
        self.extracted_parts.append(backbone_comp)
        
        #create composite part from extracted parts
        composites_list = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component)
        for composite in composites_list:
            composite[0].generated_by.append(self.assembly_plan_component) #
            self.composites.append(composite)
            self.products.append(composite)
            self.document.add(composite)

NameError: name 'sbol3' is not defined

# Digestion

## Target function in SBOL3

In [330]:
def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component, **kwargs)-> Tuple[sbol3.Component, sbol3.Sequence]:
    """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction.
    The product Component is assumed to be the insert for parts in backbone and the backbone for backbones.

    :param reactant: DNA to be digested as SBOL Component, usually a part_in_backbone. 
    :param restriction_enzymes: Restriction enzymes used  Externally Defined.
    :return: A tuple of Component and Interaction.
    """
    if sbol3.SBO_DNA not in reactant.types:
        raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.')
    if len(reactant.sequences)!=1:
        raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences')
    participations=[]
    restriction_enzymes_pydna=[] 
    for re in restriction_enzymes:
        enzyme = Restriction.__dict__[re.name]
        restriction_enzymes_pydna.append(enzyme)
        modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re)
        participations.append(modifier_participation)

    # Inform topology to PyDNA, if not found assuming linear. 
    if is_circular(reactant):
        circular=True
        linear=False
    else: 
        circular=False
        linear=True
        
    reactant_seq = reactant.sequences[0].lookup().elements
    # Dseqrecord is from PyDNA package with reactant sequence
    ds_reactant = Dseqrecord(reactant_seq, linear=linear, circular=circular)
    digested_reactant = ds_reactant.cut(restriction_enzymes_pydna)

    if len(digested_reactant)<2 or len(digested_reactant)>3:
        raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}')
    #TODO select them based on content rather than size.
    elif circular and len(digested_reactant)==2:
        part_extract, backbone = sorted(digested_reactant, key=len)
    elif linear and len(digested_reactant)==3:
        prefix, part_extract, suffix = digested_reactant
    else: raise NotImplementedError('The reactant has no valid topology type')
    
    # Extracting roles from features
    reactant_features_roles = []
    for f in reactant.features:
        for r in f.roles:
             reactant_features_roles.append(r)
    # if part
    if any(n==tyto.SO.engineered_insert for n in reactant_features_roles):
        # Compute the length of single strand sticky ends or fusion sites
        product_5_prime_ss_strand, product_5_prime_ss_end = part_extract.seq.five_prime_end()
        product_3_prime_ss_strand, product_3_prime_ss_end = part_extract.seq.three_prime_end()
    
        product_sequence = str(part_extract.seq)
        prod_component_definition, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence, **kwargs) #str(product_sequence))
        # add sticky ends features
        five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1)
        three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3)
        fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site])
        prod_component_definition.roles.append(tyto.SO.engineered_insert) 
        prod_component_definition.features.append(fusion_sites_feature)

    # if backbone
    elif any(n==tyto.SO.deletion for n in reactant_features_roles):
        # Compute the length of single strand sticky ends or fusion sites
        product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end()
        product_3_prime_ss_strand, product_3_prime_ss_end = backbone.seq.three_prime_end()
        product_sequence = str(backbone.seq)
        prod_component_definition, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence, **kwargs) #str(product_sequence))
        # add sticky ends features
        five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1)
        three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3)
        fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site])
        prod_component_definition.roles.append(tyto.SO.plasmid_vector)
        prod_component_definition.features.append(fusion_sites_feature)

    else: raise NotImplementedError('The reactant has no valid roles')

    #Add reference to part in backbone
    reactant_subcomponent = sbol3.SubComponent(reactant)
    prod_component_definition.features.append(reactant_subcomponent)
    # Create reactant Participation.
    react_subcomp = sbol3.SubComponent(reactant)
    assembly_plan.features.append(react_subcomp)
    reactant_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=react_subcomp)
    participations.append(reactant_participation)
    
    prod_subcomp = sbol3.SubComponent(prod_comp)
    assembly_plan.features.append(prod_subcomp)
    product_participation = sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=prod_subcomp)
    participations.append(product_participation)
   
    # Make Interaction
    interaction = sbol3.Interaction(types=[tyto.SBO.cleavage], participations=participations)
    assembly_plan.interactions.append(interaction)
                    
    return prod_comp, prod_seq

NameError: name 'sbol3' is not defined

## SBOL2 implementation

In [16]:
# helper function
def is_circular(obj: Union[sbol2.ComponentDefinition, sbol2.Component]) -> bool:
    """Check if an SBOL Component or Feature is circular.
    :param obj: design to be checked
    :return: true if circular
    """    
    return any(n==sbol2.SO_CIRCULAR for n in obj.types)

In [34]:
def digestion2(reactant:sbol2.ModuleDefinition, restriction_enzymes:List[sbol2.ComponentDefinition], assembly_plan:sbol2.ModuleDefinition, document: sbol2.Document, **kwargs)-> Tuple[sbol2.ComponentDefinition, sbol2.Sequence]:
    """Digests a ModuleDefinition using the provided restriction enzymes and creates a product ComponentDefinition and a digestion Interaction.
    The product ComponentDefinition is assumed to be the insert for parts in backbone and the open backbone for backbones.

    :param reactant: DNA to be digested as SBOL ModuleDefinition, usually a part_in_backbone. 
    :param restriction_enzymes: Restriction enzymes used ComponentDefinition.
    :param document: SBOL2 document to be used to extract referenced objects.
    :return: A tuple of ComponentDefinition and Sequence.
    """
    # extract component definition from module
    reactant_def_URI = reactant.functionalComponents[0].definition
    reactant_component_definition = document.getComponentDefinition(reactant_def_URI)

    if sbol2.BIOPAX_DNA not in reactant_component_definition.types:
        raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.')
    if len(reactant_component_definition.sequences)!=1: # TODO review if true for MD, maybe for MD it will be 5
        # looks like module def has 5 sequences, maybe try to extract toplevel?
        raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences')
    participations=[]
    restriction_enzymes_pydna=[] 
    
    for re in restriction_enzymes:
        enzyme = Restriction.__dict__[re.name]
        restriction_enzymes_pydna.append(enzyme)
        modifier_participation = sbol2.Participation(uri='restriction', participant=re)
        modifier_participation.roles = ['http://identifiers.org/biomodels.sbo/SBO:0000019']
        participations.append(modifier_participation)

    # Inform topology to PyDNA, if not found assuming linear. 
    if is_circular(reactant_component_definition):
        circular=True
        linear=False
    else: 
        circular=False
        linear=True
        
    reactant_seq = reactant_component_definition.sequences[0]
    reactant_seq = document.getSequence(reactant_seq).elements
    # Dseqrecord is from PyDNA package with reactant sequence
    ds_reactant = Dseqrecord(reactant_seq, circular=circular)
    print(f"ds reactant: {ds_reactant}")
    print(f"restriction enzymes: {restriction_enzymes_pydna}")
    digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) #TODO see if ds_reactant.cut is working, causing problems downstream

    print(f"digested reactant: {digested_reactant}")


    if len(digested_reactant)<2 or len(digested_reactant)>3: 
        raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}')
    #TODO select them based on content rather than size.
    elif circular and len(digested_reactant)==2:
        part_extract, backbone = sorted(digested_reactant, key=len)
    elif linear and len(digested_reactant)==3:
        prefix, part_extract, suffix = digested_reactant
    else: raise NotImplementedError('The reactant has no valid topology type')
    
    # Extracting roles from features
    reactant_features_roles = []
    for f in reactant_component_definition.components:
        for r in f.roles:
             reactant_features_roles.append(r)
    # if part
    test = True
    if test == True: #any(n==tyto.SO.engineered_insert for n in reactant_features_roles):
        # Compute the length of single strand sticky ends or fusion sites
        product_5_prime_ss_strand, product_5_prime_ss_end = part_extract.seq.five_prime_end()
        product_3_prime_ss_strand, product_3_prime_ss_end = part_extract.seq.three_prime_end()
    
        product_sequence = str(part_extract.seq)
        prod_component_definition, prod_seq = dna_componentdefinition_with_sequence2(identity=f'{reactant_component_definition.name}_part_extract', sequence=product_sequence, **kwargs) #str(product_sequence))
        # add sticky ends annotations
        five_prime_fusion_site_location = sbol2.Range(uri="five_prime_fusion_site_location", start=1, end=len(product_5_prime_ss_end)) #sequence=product_sequence, order 1
        three_prime_fusion_site_location = sbol2.Range(uri="three_prime_fusion_site_location", start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence)) #sequence=product_sequence, order 3
        fusion_sites_annotation = sbol2.SequenceAnnotation(uri="fusion_sites_annotation")
        fusion_sites_annotation.locations.add(five_prime_fusion_site_location)
        fusion_sites_annotation.locations.add(three_prime_fusion_site_location)
        fusion_sites_annotation.addRole(tyto.SO.insertion_site)

        prod_component_definition.addRole(tyto.SO.engineered_insert) 
        prod_component_definition.sequenceAnnotations.add(fusion_sites_annotation)

    # if backbone
    elif any(n==tyto.SO.deletion for n in reactant_features_roles):
        # Compute the length of single strand sticky ends or fusion sites
        product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end()
        product_3_prime_ss_strand, product_3_prime_ss_end = backbone.seq.three_prime_end()
        product_sequence = str(backbone.seq)
        prod_component_definition, prod_seq = dna_componentdefinition_with_sequence2(identity=f'{reactant_component_definition.name}_backbone', sequence=product_sequence, **kwargs) #str(product_sequence))
        # add sticky ends annotations
        five_prime_fusion_site_location = sbol2.Range(uri="five_prime_fusion_site_location", start=1, end=len(product_5_prime_ss_end)) #sequence=product_sequence, order 1
        three_prime_fusion_site_location = sbol2.Range(uri="three_prime_fusion_site_location", start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence)) #sequence=product_sequence, order 3
        fusion_sites_annotation = sbol2.SequenceAnnotation(uri="fusion_sites_annotation")
        fusion_sites_annotation.locations.add(five_prime_fusion_site_location)
        fusion_sites_annotation.locations.add(three_prime_fusion_site_location)
        fusion_sites_annotation.addRole(tyto.SO.insertion_site)

        prod_component_definition.addRole(tyto.SO.plasmid_vector) 
        prod_component_definition.sequenceAnnotations.add(fusion_sites_annotation)


    else: raise NotImplementedError('The reactant has no valid roles')

    #Add reference to part in backbone
    reactant_component = sbol2.FunctionalComponent(uri="reactant_component", definition=reactant_component_definition)
    prod_component_definition.components.add(reactant_component)
    # Create reactant Participation.
    reaction_component = sbol2.FunctionalComponent(uri="reaction_component", definition=reactant_component_definition)
    assembly_plan.functionalComponents.add(reaction_component)
    reactant_participation = sbol2.Participation(uri="reactant_participation", participant=reaction_component)
    reactant_participation.roles = [sbol2.SBO_REACTANT]
    participations.append(reactant_participation)
    
    prod_subcomp = sbol2.FunctionalComponent(uri="product_component", definition=prod_component_definition)
    assembly_plan.functionalComponents.add(prod_subcomp)
    product_participation = sbol2.Participation(uri="product_participation", participant=prod_subcomp)
    product_participation.roles = [sbol2.SBO_REACTANT]
    participations.append(product_participation)
   
    # Make Interaction
    interaction = sbol2.Interaction(uri="asssembly_plan_interaction", interaction_type=tyto.SBO.cleavage)
    interaction.participations = participations
    assembly_plan.interactions.add(interaction)
                    
    return prod_component_definition, prod_seq

## Tests

In [39]:
digestion_doc = sbol2.Document()
digestion_doc.read('Test/Part_in_backbone_defined.xml')
md = digestion_doc.getModuleDefinition("https://sbolcanvas.org/module1")
assembly_plan = sbol2.ModuleDefinition('new_assembly_plan')


# all attributes of ModuleDefinition
# for element in md.__dict__:
#     print(element)


# top level functional component(what we are after)
top_func_component = md.functionalComponents[0]


comp_def_uri = top_func_component.definition # uri as a string
comp_def = digestion_doc.getComponentDefinition(comp_def_uri) #uri as object(annoying that it has to be done this way)

print(comp_def)
print(comp_def.__dict__)
print(comp_def.sequences)

# for subcomp in comp_def.components:
#     print(f'Subcomponent ID: {subcomp.identity}')
#     print(f'Definition: {subcomp.definition}')


# top_level = 
# print(top_level)

digestion2(md, [bsai], assembly_plan, digestion_doc)
digestion_doc.add(assembly_plan)



https://sbolcanvas.org/j0nju1Eb/1
{'owned_objects': {'http://sbols.org/v2#sequenceAnnotation': [<sbol2.sequenceannotation.SequenceAnnotation object at 0x7fea5a15c190>, <sbol2.sequenceannotation.SequenceAnnotation object at 0x7fea5a158940>, <sbol2.sequenceannotation.SequenceAnnotation object at 0x7fea5a15c610>, <sbol2.sequenceannotation.SequenceAnnotation object at 0x7fea5a15cac0>], 'http://sbols.org/v2#component': [<sbol2.component.Component object at 0x7fea5a1553d0>, <sbol2.component.Component object at 0x7fea5a158340>, <sbol2.component.Component object at 0x7fea5a13ec10>, <sbol2.component.Component object at 0x7fea5a155d00>], 'http://sbols.org/v2#sequenceConstraint': [<sbol2.sequenceconstraint.SequenceConstraint object at 0x7fea5a165a60>, <sbol2.sequenceconstraint.SequenceConstraint object at 0x7fea5a165520>, <sbol2.sequenceconstraint.SequenceConstraint object at 0x7fea5a165e80>]}, 'properties': {'http://sbols.org/v2#identity': [rdflib.term.URIRef('https://sbolcanvas.org/j0nju1Eb/1')

In [40]:
digestion_doc.write('digestion2_test.xml')

'Invalid. sbol-12002:\x00 Strong Validation Error:\x00 The participant property of a Participation is REQUIRED and MUST contain a URI reference to a FunctionalComponent. \x00Reference: SBOL Version 2.3.0 Section 7.9.4 on page 45 :\x00 http://examples.org/ModuleDefinition/new_assembly_plan/asssembly_plan_interaction/product_participation/1\x00  Validation failed.'

# Ligation

In [None]:
def ligation2(): pass