<a href="https://colab.research.google.com/github/Gabrielsandbox/Synthetic-Biology-Research/blob/main/SynBio_pySBOL3_KnowledgeBase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install sbol3

In [None]:
import sbol3

doc = sbol3.Document()
doc.read('file_name.nt')
doc.write('file_name_out.nt')

#A Document is the container of SBOL objects -> TopLevel Objects they can be refered directly from the Document

len(doc)

print(doc) #To view an inventory of objects contained in the Document

for obj in doc.objects:  #To see the identities of objects in a Document.
  print(obj.identity)    #Each SBOL object in a Document is uniquely identified by a special string of characters called a Uniform Resource Identifier (URI)

#sbol3.set_namespace('http://sbolstandard.org/testfiles')

'''
Biological designs can be described with SBOL data objects, including both structural and functional features.
The principle classes for describing the structure and primary sequence of a design are Component, Sequence, and Feature.
The principle classes for describing the function of a design are Component, Feature, Interaction, and Participation.
'''

#the Component is “… analogous to a blueprint or specification sheet for a biological part…”
#Whereas the SubComponent “… represents the specific occurrence of a part…” within a larger design

cas9 = sbol3.Component('Cas9', sbol3.SBO_PROTEIN) # Creates a protein, type = SBO_PROTEIN

target_promoter = sbol3.Component('target_promoter', sbol3.SBO_DNA, roles=[sbol3.SO_PROMOTER]) # Creates a DNA component, type = SBO_DNA
second_promoter = sbol3.Component('promoters/second_promoter', sbol3.SBO_DNA) # Include a local path in addition to a display_id
third_promoter = sbol3.Component('http://sbolstandard.org/other_namespace/third_promoter', sbol3.SBO_DNA) # Use a namespace different from the configured default namespace

#http://www.sequenceontology.org/browser/obob.cgi
#https://www.ebi.ac.uk/ols4/
#SBOL unifies numerous ontologies and use them to identify objects
#https://pysbol3.readthedocs.io/en/stable/ontology.html

#To create my own ontology
SO_ENGINEERED_FUSION_GENE = tyto.SO.engineered_fusion_gene
SO_ENGINEERED_FUSION_GENE
SBO_DNA_REPLICATION = tyto.SBO.DNA_replication
SBO_DNA_REPLICATION

#In some cases a developer may want to use SBOL objects as intermediate data structures in a computational biology workflow.
#In this case, the user is free to manipulate objects independently of a Document.
#However, if the user wishes to write out a file with all the information contained in their object,
#they must first add it to the Document. This is done using the add method.

doc.add(target_promoter)
doc.add(cas9)

#Objects can be found and retrieved from a Document by using the find method.

cas9.identity
found_obj = doc.find('http://sbolstandard.org/testfiles/Cas9')
found_obj.identity
cas9.display_id
found_obj = doc.find('Cas9')
found_obj.identity

#It is possible to have multiple SBOL objects with the same display_id (but different identity) in the same document.
#The find(display_id) will return the object that was added to the Document first

#Setting attributes

cas9.description = 'This is a Cas9 protein'
plasmid = sbol3.Component('pBB1', sbol3.SBO_DNA)
plasmid.roles = [ sbol3.SO_DOUBLE_STRANDED, sbol3.SO_CIRCULAR ]

#types and roles behave like Python lists so .append() and .extend() will work on them

plasmid.roles = [ sbol3.SO_DOUBLE_STRANDED ]
plasmid.roles.append( sbol3.SO_CIRCULAR )

plasmid.roles = []
plasmid.roles.extend( [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_CIRCULAR] )

plasmid.roles = [ sbol3.SO_DOUBLE_STRANDED ]
plasmid.roles += [ sbol3.SO_CIRCULAR ]


#[IMPORTANT - MAIN WORKFLOW]
#To add a promoter to a circuit design, first define the promoter and circuit as SBOL Component objects,
#then define a SubComponent as an instance of the promoter and add that SubComponent to the circuit’s features attribute:

ptet = sbol3.Component('pTetR', sbol3.SBO_DNA, roles=[sbol3.SO_PROMOTER])
circuit = sbol3.Component('circuit', sbol3.SBO_DNA, roles=[sbol3.SO_ENGINEERED_REGION])

ptet_sc = sbol3.SubComponent(ptet)
circuit.features += [ptet_sc]

#Some SBOL objects point to other objects by way of URI references.
#or example, Components point to their corresponding Sequences by way of a URI reference.

gfp = sbol3.Component('GFP', sbol3.SBO_DNA)
doc.add(gfp)
gfp_seq = sbol3.Sequence('GFPSequence', elements='atgnnntaa', encoding=sbol3.IUPAC_DNA_ENCODING)
doc.add(gfp_seq)
gfp.sequences = [ gfp_seq ]
print(gfp.sequences)
# Look up the sequence via the document
seq2 = gfp.sequences[0].lookup()
seq2 == gfp_seq

#equivalent assigment:
gfp.sequences = [ gfp_seq.identity ]
print(gfp.sequences)
seq2 = gfp.sequences[0].lookup()
seq2 == gfp_seq

#DNA sequence information is saved as the elements attribute of the Sequence object

gfp_seq.elements

# Iterate through objects (black diamond properties in UML)
for feat in circuit.features:
    print(f'{feat.display_id}, {feat.identity}, {feat.instance_of}')

# Iterate through references (white diamond properties in UML)
for seq in gfp.sequences:
    print(seq)

#Numerical indexing of list properties works as well:
for n in range(0, len(circuit.features)):
    print(circuit.features[n].display_id)

#Copy documents and objects
doc2 = doc.copy()
objects = sbol3.copy(doc, into_namespace='https://example.org/foo') #Copy and change obj namespace

# Any iterable of TopLevel can be passed to sbol3.copy:
#if a user wants to construct a new set of objects and add them to an existing Document they can do so using the into_document argument to sbol3.copy
sbol3.copy([cd1], into_namespace='https://example.org/bar', into_document=doc3)
