In [2]:
import sbol2

# Create an SBOL document
doc = sbol2.Document()

In [3]:
# Set a namespace for the document
sbol2.setHomespace('https://github.com/SynBioDex/SBOL-Notebooks')

# Create a Sequence object
seq = sbol2.Sequence('example_sequence')
seq.elements = 'ATGCGTACGTAGCTAGTCTGATCGTAGCTAGT'
seq.encoding = sbol2.SBOL_ENCODING_IUPAC

# Add the sequence to the document
doc.addSequence(seq)

In [4]:
# Create a ComponentDefinition object for a DNA component
comp_def = sbol2.ComponentDefinition('example_component', sbol2.BIOPAX_DNA)
comp_def.sequences = [seq.persistentIdentity]

# Add the ComponentDefinition to the document
doc.addComponentDefinition(comp_def)

# Create a SequenceAnnotation object to describe a notable feature
seq_annotation = sbol2.SequenceAnnotation('cut_feature')

# Create a Cut Location
cut = sbol2.Cut('cut_location')
cut.at = 10  # Define where the cut is located (position 10)
cut.orientation = sbol2.SBOL_ORIENTATION_INLINE

# Add the Cut location to the SequenceAnnotation
seq_annotation.locations.add(cut)

# Add the SequenceAnnotation to the ComponentDefinition
comp_def.sequenceAnnotations.add(seq_annotation)

In [5]:
# Check if the SBOL document is valid
doc.validate()

'Valid.'

In [6]:
# Save the document to an SBOL file
doc.write('cut_example.xml')

'Valid.'

## Extracting Sub-sequences Before and After the Cut

In this section, we will extract the sub-sequences before and after the cut position defined by the `cut_location` in our `SequenceAnnotation`. This process illustrates how we can easily manipulate and analyze sequence data within the SBOL framework, making use of SBOL's standardized approach to describing and annotating biological sequences.

We have defined a `SequenceAnnotation` with a `Cut` location at position 10, meaning that the sequence is "cut" at this position. Now, we will use this information to split the sequence into two parts: one before the cut and one after it.

In [7]:
# The original sequence is as follows:
sequence = seq.elements
print(sequence)

ATGCGTACGTAGCTAGTCTGATCGTAGCTAGT


In [8]:
#Define the cut position (from the SequenceAnnotation's cut location)
cut_position = cut.at

# Extract the sub-sequences
before_cut = sequence[:cut_position]
after_cut = sequence[cut_position:]

# Output the sub-sequences
print(f"Before cut: {before_cut}")
print(f"After cut: {after_cut}")

Before cut: ATGCGTACGT
After cut: AGCTAGTCTGATCGTAGCTAGT
