In [5]:
import Bio
from Bio import SeqIO
import dnachisel

AloeChloroplastGenBank=SeqIO.read("Genome/Aloe Chloroplast.gb","genbank")
print(AloeChloroplastGenBank.id)

for CODON_TABLE in dnachisel.biotools.CODON_TABLE_NAMES:
    print(CODON_TABLE)

NC_035506.1
Standard
SGC0
Vertebrate Mitochondrial
SGC1
Yeast Mitochondrial
SGC2
Mold Mitochondrial
Protozoan Mitochondrial
Coelenterate Mitochondrial
Mycoplasma
Spiroplasma
SGC3
Invertebrate Mitochondrial
SGC4
Ciliate Nuclear
Dasycladacean Nuclear
Hexamita Nuclear
SGC5
Echinoderm Mitochondrial
Flatworm Mitochondrial
SGC8
Euplotid Nuclear
SGC9
Bacterial
Archaeal
Plant Plastid
Alternative Yeast Nuclear
Ascidian Mitochondrial
Alternative Flatworm Mitochondrial
Blepharisma Macronuclear
Chlorophycean Mitochondrial
Trematode Mitochondrial
Scenedesmus obliquus Mitochondrial
Thraustochytrium Mitochondrial
Pterobranchia Mitochondrial
Candidate Division SR1
Gracilibacteria
Pachysolen tannophilus Nuclear
Karyorelict Nuclear
Condylostoma Nuclear
Mesodinium Nuclear
Peritrich Nuclear
Blastocrithidia Nuclear
Balanophoraceae Plastid


In [2]:
import Bio
from Bio import SeqIO
from dnachisel import *

finalSequence=list()

HumanInsulinFASTA=list(SeqIO.parse("Genome/Human Insulin.fasta","fasta"))
for record in HumanInsulinFASTA:
    print(record.description)
    print(record.seq)
    strSeq=str(record.seq)[:(len(str(record.seq))//3)*3]
    problem = DnaOptimizationProblem(
        sequence=strSeq,
        constraints=[
            EnforceTranslation()
        ],
        objectives=[CodonOptimize(species=34199)]
    )

    # SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE

    problem.resolve_constraints()
    problem.optimize()

    # PRINT SUMMARIES TO CHECK THAT CONSTRAINTS PASS

    print(problem.constraints_text_summary())
    print(problem.objectives_text_summary())

    # GET THE FINAL SEQUENCE (AS STRING OR ANNOTATED BIOPYTHON RECORDS)

    final_sequence = problem.sequence  # string
    finalSequence+=('-'*len(record.seq)%3)
    final_sequence = Bio.SeqRecord.SeqRecord(
        Bio.Seq.Seq(final_sequence+'-'*len(record.seq)%3),
        id=record.id,
        name=record.name,
        description=record.description+" Optimized for Aloe vera based on Kazusa species 34199."
    )
    #final_record = problem.to_record(with_sequence_edits=True)
    finalSequence.append(final_sequence)

for newRecord in finalSequence:
    print(newRecord.description)
    print(newRecord.seq)

SeqIO.write(finalSequence,"Genome/Optimized Human Insulin.fasta","fasta")

objective:   0%|          | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-465](34199)...]
location:   0%|          | 0/80 [00:00<?, ?it/s, now=None][A
location:   0%|          | 0/80 [00:00<?, ?it/s, now=0-3] [A
                                                                                  NM_000207.3 Homo sapiens insulin (INS), transcript variant 1, mRNA
AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGATCACTGTCCTTCTGCCATGGCCCTGTGGATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTGACCCAGCCGCAGCCTTTGTGAACCAACACCTGTGCGGCTCACACCTGGTGGAAGCTCTCTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACACACCCAAGACCCGCCGGGAGGCAGAGGACCTGCAGGTGGGGCAGGTGGAGCTGGGCGGGGGCCCTGGTGCAGGCAGCCTGCAGCCCTTGGCCCTGGAGGGGTCCCTGCAGAAGCGTGGCATTGTGGAACAATGCTGTACCAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAGACGCAGCCCGCAGGCAGCCCCACACCCGCCGCCTCCTGCACCGAGAGAGATGGAATAAAGCCCTTGAACCAGC
===> SUCCESS - all constraints evaluations pass
✔PASS ┍ EnforceTranslation[0-465]
      │ Enforced by nucleotides restrictions


===> TOTAL OBJECTIVES SCORE:         0
✔        0

TypeError: not all arguments converted during string formatting

In [None]:
import Bio
from Bio import SeqIO

pCAMBIAFASTA=SeqIO.parse("Genome/pCAMBIA.fa","fasta")
for record in pCAMBIAFASTA:
    print(record.id)
    print(record.seq)

In [2]:
import Bio
from Bio.Data import CodonTable

print(CodonTable.unambiguous_rna_by_id[11])

Table 11 Bacterial, Archaeal, Plant Plastid

  |  U      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
U | UUU F   | UCU S   | UAU Y   | UGU C   | U
U | UUC F   | UCC S   | UAC Y   | UGC C   | C
U | UUA L   | UCA S   | UAA Stop| UGA Stop| A
U | UUG L(s)| UCG S   | UAG Stop| UGG W   | G
--+---------+---------+---------+---------+--
C | CUU L   | CCU P   | CAU H   | CGU R   | U
C | CUC L   | CCC P   | CAC H   | CGC R   | C
C | CUA L   | CCA P   | CAA Q   | CGA R   | A
C | CUG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | AUU I(s)| ACU T   | AAU N   | AGU S   | U
A | AUC I(s)| ACC T   | AAC N   | AGC S   | C
A | AUA I(s)| ACA T   | AAA K   | AGA R   | A
A | AUG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GUU V   | GCU A   | GAU D   | GGU G   | U
G | GUC V   | GCC A   | GAC D   | GGC G   | C
G | GUA V   | GCA A   | GAA E   | GGA G   | A
G | GUG V(s)| GCG A   | GAG E   | GGG

In [19]:
import Bio
from Bio import AlignIO
from Bio import SeqIO

HumanInsulinFASTA=list(SeqIO.parse("Genome/Human Insulin.fasta","fasta"))
OptimizedHumanInsulinFasta=list(SeqIO.parse("Genome/Optimized Human Insulin.fasta","fasta"))
for _ in range (4):
    print("%d : %d" % (len(HumanInsulinFASTA[_].seq),len(OptimizedHumanInsulinFasta[_].seq)))

465 : 465
491 : 489
644 : 642
525 : 525
