# Freqgen Example Use

We are going to recode the GFP gene from _Aequorea victoria_ to match the highly expressed genes of _Escherichia coli_.

In [1]:
from Bio import SeqIO

# first, get the sequence of GFP
insert = SeqIO.read("gfp.fasta", "fasta").seq
insert

Seq('ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTA...TAA', SingleLetterAlphabet())

In [3]:
# next, we'll get the sequences of the highly expressed E. coli genes
target = []
with open("ecoli.heg.fasta", "r") as handle:
        for seq in SeqIO.parse(handle, "fasta"):
            seq = str(seq.seq)
            target.append(seq)
target

['ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGA',
 'ATGACGGACAAATTGACCTCCCTTCGTCAGTACACCACCGTAGTGGCCGACACTGGGGACATCGCGGCAATGAAGCTGTATCAACCGCAGGATGCCACAACCAACCCTTCTCTCATTCTTAACGCAGCGCAGATTCCGGAATACCGTAAGTTGATTGATGATGCTGTCGCCTGGGCGAAACAGCAGAGCAACGATCGCGCGCAGCAGATCGTGGACGCGACCGACAAACTGGCAGTAAATATTGGTCTGGAAATCCTGAAACTGGTTCCGGGCCGTATCTCAACTGAAGTTGATGCGCGTCTTTCCTATGACACCGAAGCGTCAATTGCGAAAGCAAAACGCCTGATCAAACTCTACAACGATGCTGGTATTAGCAACGATCGTATTCTGATCAAACTGGCTTCTACCTGGCAGGGTATCCGTGCTGCAGAACAGCTGGAAAAAGAAGGCATCAACTGTAACCTGACCCTGCTGTTCTCCTTCGCTCAGGCTCGTGCTTGTGCGGAAGCGGGCGTGTTCCTGATCTCGCCGTTTGTTGGCCGTATTCTTGACTGGTACAAAGCGAATACCGATAAGAAAGAGTACGCTCCGGCAGAAGATCCGGGCGTGGTTTCTGTATCTGAAATCTACCAGTACTACAAAGAGCACGGTTATGAAACCGTGGTTATGGGCGCAAGCTTCCGTAACATCGGCGAAATTCTGGAACTGGCAGGCTGCGACCGTCTGACCATCGCACCGGCACTGCTGAAAGAGCTGGCGGAGAGCGAAGGGGCTATCGAACGTAAACTGTCTTACACCGGCGAAGTGAAAGCGCGTCCGGCGCGTATCACTGAGTCCGAGTTCCTGTGGCAGCACAACCAGGATCCAATGGCAGTAGATAAACTGGCGGAAGGTATCCGTAAGTTTGCTATTGACCAGGAAAAACTG

In [4]:
import freqgen

rows = {}
for i in [1, 2, 3, 4, 5]:
    print(f"k={i}")
    target_freqs = freqgen.k_mer_frequencies(target, i)
    target_freqs = {i: target_freqs}
    result = freqgen.generate(target_freqs, insert.translate(), verbose=True)
    insert_freqs = {i: freqgen.k_mer_frequencies(insert, i)}
    result_freqs = {i: freqgen.k_mer_frequencies(result, i)}
    rows[i] = dict(insert_freqs=insert_freqs, target_freqs=target_freqs, result_freqs=result_freqs)

k=1
Gen: 121        Since Improvement: 50/50      Fitness: 7.942537290084317e-07
k=2
Gen: 146        Since Improvement: 50/50      Fitness: 0.00026886576264262274
k=3
Gen: 331        Since Improvement: 50/50      Fitness: 0.0027596497242114992
k=4
Gen: 362        Since Improvement: 50/50      Fitness: 0.025325482625317086
k=5
Gen: 258        Since Improvement: 50/50      Fitness: 0.19971977034127164


In [9]:
import json
with open('gfp-ecoliheg.json', 'w+') as f:
    json.dump(rows, f, indent=4)