In [2]:
import concurrent.futures

import polars as pl

from mrnarchitect.sequence import Sequence
from mrnarchitect.utils.analysis import hydrate

In [3]:
df = pl.read_csv("optimized-sequences.csv")
df

index,source,name,raw_input_sequence,input_sequence,error,optimized_sequence,optimization_error
i64,str,str,str,str,str,str,str
0,"""antibody_monoclonal_aa_seqs.fa…","""RituximabTargetAnti-CD20v_heav…","""QVQLQQPGAELVKPGASVKMSCKASGYTFT…","""CAGGTGCAGCTGCAGCAGCCCGGCGCCGAG…",,"""CAGGTGCAGCTCCAGCAGCCAGGCGCCGAA…",
1,"""antibody_monoclonal_aa_seqs.fa…","""RituximabTargetAnti-CD20v_ligh…","""QIVLSQSPAILSASPGEKVTMTCRASSSVS…","""CAGATCGTGCTGAGCCAGAGCCCCGCCATC…",,"""CAGATCGTGCTGAGCCAGAGCCCCGCCATC…",
2,"""antibody_monoclonal_aa_seqs.fa…","""TrastuzumabTargetAnti-HER2_Lig…","""DIQMTQSPSSLSASVGDRVTITCRASQDVN…","""GACATCCAGATGACCCAGAGCCCCAGCAGC…",,"""GACATCCAGATGACCCAGAGCCCCAGCTCC…",
3,"""antibody_monoclonal_aa_seqs.fa…","""TrastuzumabTargetAnti-HER2_Hea…","""EVQLVESGGGLVQPGGSLRLSCAASGFNIK…","""GAGGTGCAGCTGGTGGAGAGCGGCGGCGGC…",,"""GAGGTGCAGCTGGTGGAGAGCGGCGGCGGC…",
4,"""antibody_monoclonal_aa_seqs.fa…","""Bevacizumab_light_chain""","""DIQMTQSPSSLSASVGDRVTITCSASQDIS…","""GACATCCAGATGACCCAGAGCCCCAGCAGC…",,"""GACATCCAGATGACCCAGAGCCCCAGCTCC…",
…,…,…,…,…,…,…,…
47568,"""ensembl_ccds_tx_nt_seq.fa""","""ENSG00000173846|ENSG0000017384…","""ATGGAGCCTGCCGCCGGTTTCCTGTCTCCG…","""ATGGAGCCTGCCGCCGGTTTCCTGTCTCCG…",,"""ATGGAGCCTGCCGCTGGCTTTCTGTCTCCT…",
47569,"""ensembl_ccds_tx_nt_seq.fa""","""ENSG00000142166|ENSG0000014216…","""ATGGATAATTGGATAAAATTGTCTGGGTGT…","""ATGGATAATTGGATAAAATTGTCTGGGTGT…",,"""ATGGACAACTGGATCAAGCTGAGCGGCTGC…",
47570,"""ensembl_ccds_tx_nt_seq.fa""","""ENSG00000180488|ENSG0000018048…","""ATGTCAGACTGCTGCTCAGCGCCAGGCATC…","""ATGTCAGACTGCTGCTCAGCGCCAGGCATC…",,"""ATGAGCGACTGCTGCAGCGCCCCCGGCATC…",
47571,"""ensembl_ccds_tx_nt_seq.fa""","""ENSG00000145216|ENSG0000014521…","""ATGTCGGCCGGCGAGGTCGAGCGCCTAGTG…","""ATGTCGGCCGGCGAGGTCGAGCGCCTAGTG…",,"""ATGAGCGCCGGCGAGGTCGAGAGACTGGTG…",


In [4]:
def _hydrate(input_: dict) -> dict:
    if input_["index"] % 1000 == 0:
        print(f"index {input_['index']}")
    input_sequence = input_["input_sequence"]
    hydrated_input = {}
    optimized_sequence = input_["optimized_sequence"]
    hydrated_optimized = {}
    if input_sequence:
        hydrated_input = hydrate(Sequence.create(input_sequence))
    if optimized_sequence:
        hydrated_optimized = hydrate(Sequence.create(optimized_sequence))
    
    return {
        "index": input_["index"],
        **{f"input_{k}": v for k, v in hydrated_input.items()},
        **{f"optimized_{k}": v for k, v in hydrated_optimized.items()},
    }

with concurrent.futures.ProcessPoolExecutor() as executor:
    results = list(executor.map(
        _hydrate,
        list(df.rows(named=True)),
    ))

hydrated_df = pl.DataFrame(list(results))
hydrated_df.write_csv("output-hydrated.csv")
hydrated_df

index 0
index 48000
index 49000
index 50000
index 1000
index 2000
index 3000
index 4000
index 5000
index 6000
index 7000
index 8000
index 9000
index 10000
index 11000
index 12000
index 13000
index 14000
index 15000
index 16000
index 17000
index 18000
index 19000
index 20000
index 21000
index 22000
index 23000
index 24000
index 46000
index 47000


index,input_a,input_c,input_g,input_t,input_gc,input_gc1,input_gc2,input_gc3,input_cpg,input_uridine_depletion,input_cai,input_tai,input_mfe,input_amfe,input_mfe_structure,input_wmfe,input_gini,input_slippery_site_ratio,input_rscu,input_rcbs,input_dcbs,input_rcr,input_cub,input_cbi,optimized_a,optimized_c,optimized_g,optimized_t,optimized_gc,optimized_gc1,optimized_gc2,optimized_gc3,optimized_cpg,optimized_uridine_depletion,optimized_cai,optimized_tai,optimized_mfe,optimized_amfe,optimized_mfe_structure,optimized_wmfe,optimized_gini,optimized_slippery_site_ratio,optimized_rscu,optimized_rcbs,optimized_dcbs,optimized_rcr,optimized_cub,optimized_cbi
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64
0,0.233555,0.350333,0.302291,0.113821,0.652624,0.652624,0.465632,0.982262,0.060606,0.0,1.0,0.372443,-522.0,-0.385809,"""(((((.(.((((...))))..(((((((.(…",-8.831611,0.163581,0.0,3.567627,0.815107,2.385627,0.0,1.093384,1.0,0.232077,0.343681,0.288987,0.135255,0.632668,0.632668,0.465632,0.922395,0.055432,0.042129,0.979129,0.369046,-507.799988,-0.375314,"""((((.(((((((.....((..(((((((..…",-8.361398,0.256065,0.003695,2.986456,0.750011,2.330087,0.007391,0.929305,0.873899
1,0.239437,0.348983,0.303599,0.107981,0.652582,0.652582,0.507042,0.971831,0.059468,0.0,1.0,0.362417,-227.5,-0.356025,"""......(((((..((....)).(((((.((…",-8.532667,0.167019,0.0,3.71831,0.856337,2.440252,0.0,1.11896,1.0,0.231612,0.350548,0.29734,0.120501,0.647887,0.647887,0.507042,0.957746,0.059468,0.014085,0.99444,0.3632,-234.399994,-0.366823,"""......(((((..(((..........((.(…",-8.714667,0.205446,0.0,3.325244,0.797784,2.489732,0.0,1.034453,0.936471
2,0.244548,0.353583,0.28972,0.11215,0.643302,0.643302,0.476636,0.96729,0.05296,0.0,1.0,0.365163,-225.199997,-0.350779,"""..(((.....((.(.(((.(((((((((((…",-8.297351,0.166355,0.0,3.668224,0.893168,2.538889,0.0,1.112429,1.0,0.238318,0.35514,0.283489,0.123053,0.638629,0.638629,0.476636,0.953271,0.05296,0.014019,0.9951,0.364569,-227.199997,-0.353894,""".......((((......((((....))))(…",-8.364901,0.20729,0.001558,3.328629,0.826603,2.576876,0.003115,1.028316,0.936657
3,0.232593,0.341481,0.308889,0.117037,0.65037,0.65037,0.462222,0.971111,0.055556,0.0,1.0,0.369346,-525.900024,-0.389556,""".(((((....(((((..((((.((((((((…",-8.746341,0.153111,0.0,3.593333,0.83995,2.454101,0.0,1.098031,1.0,0.231852,0.334074,0.304444,0.12963,0.638519,0.638519,0.462222,0.933333,0.051111,0.028889,0.984833,0.367733,-533.400024,-0.395111,"""........(((((.((.((((.((((((((…",-8.753963,0.261373,0.000741,3.114534,0.728153,2.439528,0.003704,0.969142,0.901277
4,0.246106,0.345794,0.292835,0.115265,0.638629,0.638629,0.46729,0.976636,0.051402,0.0,1.0,0.365897,-228.899994,-0.356542,"""...............(((.((.......))…",-8.643046,0.166822,0.0,3.649533,0.839185,2.424216,0.0,1.105428,1.0,0.238318,0.350467,0.285047,0.126168,0.635514,0.635514,0.46729,0.96729,0.051402,0.009346,0.996526,0.364617,-234.5,-0.365265,""".................((((....)))).…",-8.733113,0.186916,0.001558,3.359791,0.807505,2.471953,0.001558,1.040007,0.950384
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
47568,0.193199,0.325605,0.282844,0.198351,0.608449,0.608449,0.469861,0.718702,0.052035,0.180835,0.80236,0.314241,-779.0,-0.40134,"""..((((((((((((.(((........((((…",-8.80105,0.197785,0.01288,1.448282,0.153497,1.578792,0.050489,0.328448,0.312117,0.217929,0.340031,0.292117,0.149923,0.632148,0.632148,0.471406,0.837713,0.049974,0.071097,0.95881,0.349535,-755.0,-0.388975,"""..((......))((.(((((.(((((((.(…",-8.542437,0.264046,0.001546,2.901943,0.854589,2.796145,0.004637,0.879412,0.815951
47569,0.354465,0.175869,0.173142,0.296524,0.349012,0.349012,0.349693,0.269939,0.010225,0.390593,0.697652,0.301027,-301.899994,-0.205794,""".......(((((((...(((((((.....(…",-3.277591,0.200265,0.030675,1.339973,0.192895,1.57899,0.135651,0.630238,-0.255586,0.271984,0.310157,0.27062,0.147239,0.580777,0.580777,0.349693,0.94683,0.05726,0.018405,0.987422,0.391515,-462.399994,-0.315201,"""....(((...((.((..(((((((((((((…",-6.694958,0.246325,0.0,2.982808,0.604532,2.1884,0.003408,0.953074,0.924796
47570,0.294365,0.191153,0.225382,0.2891,0.416535,0.416535,0.393365,0.35545,0.014218,0.35387,0.709525,0.301219,-530.799988,-0.279516,""".((((.((.((((((((.((.(((.((..(…",-4.986452,0.184057,0.040021,1.238317,0.183278,1.590541,0.112164,0.470331,-0.102769,0.240126,0.306477,0.301211,0.152185,0.607688,0.607688,0.394945,0.908373,0.054766,0.031596,0.974443,0.375867,-739.700012,-0.389521,""".......(((.(((((((.((((((((...…",-8.722366,0.28126,0.001053,2.994115,0.676685,2.481779,0.006319,0.925908,0.884308
47571,0.323409,0.215995,0.252189,0.208406,0.468184,0.468184,0.509632,0.315236,0.029189,0.359019,0.738042,0.289427,-466.200012,-0.272154,""".....(((.(((.((((.....))))...(…",-4.008115,0.220866,0.014011,1.335475,0.20946,1.665085,0.107414,0.529416,-0.090189,0.278459,0.319907,0.307647,0.093987,0.627554,0.627554,0.511384,0.816112,0.063047,0.068301,0.971528,0.335277,-552.5,-0.322534,""".....((((((((((((((.....((((((…",-6.104535,0.288839,0.000584,2.862714,0.75211,2.797794,0.012843,0.896484,0.799384
