In [9]:
import concurrent.futures
import polars as pl

from tools.sequence.sequence import Sequence

In [13]:
genetic = pl.read_csv("genetic.csv").with_columns(source=pl.lit("genetic"))
lineardesign = pl.read_csv("lineardesign.csv").with_columns(
    source=pl.lit("LinearDesign")
)

df = pl.concat([genetic, lineardesign], how="diagonal").with_columns(
    output_cai=pl.col("output_sequence").map_elements(
        lambda s: Sequence.from_string(s).codon_adaptation_index(),
        return_dtype=pl.Float64,
    )
)
df

name,cai_limit,output_sequence,optimize_mfe,time,output_cai,source
str,f64,str,str,f64,f64,str
"""EGFP""",0.06,"""ATGGTGAGCAAGGGCGAGGAGTTATTCACG…",,0.066582,0.939103,"""genetic"""
"""SARSCOV2_SPIKE""",0.06,"""ATGTTCGTGTTCCTGGTGCTGCTGCCCCTG…",,1.104039,0.941502,"""genetic"""
"""SARSCOV2_SPIKE""",0.04,"""ATGTTTGTGTTCCTGGTGCTGCTGCCTCTG…","""MIN""",17.647091,0.960153,"""genetic"""
"""LUCIFERASE""",0.02,"""ATGGAGGACGCCAAGAACATCAAGAAGGGC…",,0.064608,0.981588,"""genetic"""
"""SARSCOV2_SPIKE""",0.04,"""ATGTTCGTGTTCCTGGTGCTGCTGCCCCTG…","""MAX""",14.588045,0.960056,"""genetic"""
…,…,…,…,…,…,…
"""LUCIFERASE""",,"""AUGGAGGACGCCAAGAACAUCAAGAAGGGG…",,80.021719,0.984009,"""LinearDesign"""
"""EGFP""",,"""AUGGUGUCCAAGGGCGAGGAGCUCUUCACA…",,12.717408,0.893123,"""LinearDesign"""
"""EGFP""",,"""AUGGUGAGCAAGGGCGAGGAGCUGUUCACC…",,13.083948,0.990268,"""LinearDesign"""
"""SARSCOV2_SPIKE""",,"""AUGUUUGUGUUCCUGGUGCUGCUGCCCCUG…",,816.229586,0.913387,"""LinearDesign"""


In [14]:
def _mfe(s: str) -> float:
    return Sequence.from_string(s).minimum_free_energy.energy


with concurrent.futures.ProcessPoolExecutor() as executor:
    df = df.with_columns(
        output_mfe=pl.Series(
            "output_mfe", executor.map(_mfe, df["output_sequence"].to_list())
        )
    )
df

name,cai_limit,output_sequence,optimize_mfe,time,output_cai,source,output_mfe
str,f64,str,str,f64,f64,str,f64
"""EGFP""",0.06,"""ATGGTGAGCAAGGGCGAGGAGTTATTCACG…",,0.066582,0.939103,"""genetic""",-239.899994
"""SARSCOV2_SPIKE""",0.06,"""ATGTTCGTGTTCCTGGTGCTGCTGCCCCTG…",,1.104039,0.941502,"""genetic""",-1320.400024
"""SARSCOV2_SPIKE""",0.04,"""ATGTTTGTGTTCCTGGTGCTGCTGCCTCTG…","""MIN""",17.647091,0.960153,"""genetic""",-1448.199951
"""LUCIFERASE""",0.02,"""ATGGAGGACGCCAAGAACATCAAGAAGGGC…",,0.064608,0.981588,"""genetic""",-632.700012
"""SARSCOV2_SPIKE""",0.04,"""ATGTTCGTGTTCCTGGTGCTGCTGCCCCTG…","""MAX""",14.588045,0.960056,"""genetic""",-1277.199951
…,…,…,…,…,…,…,…
"""LUCIFERASE""",,"""AUGGAGGACGCCAAGAACAUCAAGAAGGGG…",,80.021719,0.984009,"""LinearDesign""",-817.700012
"""EGFP""",,"""AUGGUGUCCAAGGGCGAGGAGCUCUUCACA…",,12.717408,0.893123,"""LinearDesign""",-420.0
"""EGFP""",,"""AUGGUGAGCAAGGGCGAGGAGCUGUUCACC…",,13.083948,0.990268,"""LinearDesign""",-299.299988
"""SARSCOV2_SPIKE""",,"""AUGUUUGUGUUCCUGGUGCUGCUGCCCCUG…",,816.229586,0.913387,"""LinearDesign""",-2256.0


In [15]:
df.write_csv("combined.csv")