In [1]:
import numpy as np
import pandas as pd
from GA.TestGeneticAlgorithm import GeneticAlgorithm

In [2]:
cnn_model_path = '../../Models/CNN_6_1_2.keras'
masked_sequence = 'AATACTAGAGGTCTTCCGACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTGTGGGCGGGAAGACAACTAGGGG'

In [3]:
def get_sequences(target_expression):
    sequences = []
    predictions = []
    ga = GeneticAlgorithm(
        cnn_model_path=cnn_model_path,
        masked_sequence=masked_sequence,
        target_expression=target_expression,
        population_size=300,
        generations=100,
        seed=0,
    )
    ga.run(3)
    sequences.append(ga.best_sequences)
    predictions.append(ga.best_predictions)
    
    return sorted(
        zip(sequences[0], predictions[0]),
        key=lambda x: abs(x[1] - target_expression)
    )

def complement(seq):
    complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    return ''.join(complement[base] for base in seq)

In [10]:
data = {'Name': [], 'Sequence': [], 'Prediction': []}

for target_expression in np.linspace(0.6, 0.8, 5):
    print(f'Target expression: {target_expression}')
    sorted_results = get_sequences(target_expression)

    sequence = sorted_results[0][0]

    data['Name'].extend([f'Cal_p{target_expression*1000:.0f}_top'])
    data['Name'].extend([f'Cal_p{target_expression*1000:.0f}_bottom'])

    data['Sequence'].extend([sequence[16:-19]])
    data['Sequence'].extend([(complement(sequence[20:-19])+'CGCC')[::-1]])

    data['Prediction'].extend([round(sorted_results[0][1], 3)])
    data['Prediction'].extend([round(sorted_results[0][1], 3)])

df = pd.DataFrame(data)

Target expression: 0.6


  saveable.load_own_variables(weights_store.get(inner_path))


Target expression: 0.65
Target expression: 0.7
Target expression: 0.75
Target expression: 0.8


In [11]:
df.head(None)

Unnamed: 0,Name,Sequence,Prediction
0,Cal_p600_top,CGACTTGAAATAAGGAATATATTATTTTAACATGGTGTGG,0.6
1,Cal_p600_bottom,CCGCCCACACCATGTTAAAATAATATATTCCTTATTTCAA,0.6
2,Cal_p650_top,CGACTTTGAAAATATATTATTGTATATATATTATGTGTGG,0.65
3,Cal_p650_bottom,CCGCCCACACATAATATATATACAATAATATATTTTCAAA,0.65
4,Cal_p700_top,CGACTTGACTTATAAAATATATTGCATATAAACTGTGTGG,0.7
5,Cal_p700_bottom,CCGCCCACACAGTTTATATGCAATATATTTTATAAGTCAA,0.7
6,Cal_p750_top,CGACTTGACTTCTATATTATATTAGATAAACTATGTGTGG,0.75
7,Cal_p750_bottom,CCGCCCACACATAGTTTATCTAATATAATATAGAAGTCAA,0.75
8,Cal_p800_top,CGACTTGACGGGTATATTATACTGTATACTATCAGTGTGG,0.8
9,Cal_p800_bottom,CCGCCCACACTGATAGTATACAGTATAATATACCCGTCAA,0.8


In [12]:
df.to_csv('../../Data/RFP/2025_04_04_order.csv', index=False)