# Testing `in_silico_evolution` function

**Authorship:**
Adam Klie, *08/08/2022*
***
**Description:**
Notebook for testing the `in_silico_evolution` function

In [1]:
import numpy as np
import pandas as pd

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

In [2]:
import eugene as eu

Global seed set to 13
2022-09-11 04:49:36.100187: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-11 04:49:38.973318: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-11 04:49:38.973348: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-09-11 04:49:39.227870: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-09-11 04:49:43.466497: W tensorfl

# Data E-T-L

In [3]:
sdata = eu.datasets.random1000()
eu.pp.prepare_seqs_sdata(sdata)
sdata

  0%|          | 0/3 [00:00<?, ?it/s]

One-hot encoding sequences:   0%|          | 0/1000 [00:00<?, ?it/s]

SeqData object modified:
	ohe_seqs: None -> 1000 ohe_seqs added
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added
    seqs_annot:
        + train_val


SeqData object with = 1000 seqs
seqs = (1000,)
names = (1000,)
rev_seqs = None
ohe_seqs = (1000, 4, 66)
ohe_rev_seqs = (1000, 4, 66)
seqs_annot: 'target', 'train_val'
pos_annot: PyRanges object with 1400 features
seqsm: None
uns: None

# Model I&I

In [4]:
model = eu.models.DeepBind(input_len=66, output_dim=1)
eu.models.init_weights(model)

# Test API

In [5]:
# Set-up of some sequences to test
seq_num = np.random.choice(32, size=1, replace=False).squeeze()
seqs = sdata.seqs[:32]
seq = seqs[seq_num]
ohe_seqs = sdata.ohe_seqs[:32]
ohe_seq = ohe_seqs[seq_num]
seq, ohe_seq.shape, ohe_seqs.shape

('TCATTAGATTGGGTTGCTGTTTAGCAGGACCATATCCGGAGGCTTTAATGTTACCCGGCAGTGCTT',
 (4, 66),
 (32, 4, 66))

## Test `in_silico_best_k_muts`

In [9]:
mut_ohe_seq, delta, delta_ind = eu.interpret.best_k_muts(model, ohe_seq, k=1)
mut_seq = eu.pp.decode_seq(mut_ohe_seq.squeeze(axis=0))
seq[delta_ind.squeeze()], mut_seq[delta_ind.squeeze()], seq, mut_seq, eu.pp._utils._hamming_distance(seq, mut_seq)

[[ 2 61]] [61] [0.00944884]
(4, 66)


('T',
 'G',
 'TCATTAGATTGGGTTGCTGTTTAGCAGGACCATATCCGGAGGCTTTAATGTTACCCGGCAGTGCTT',
 'TCATTAGATTGGGTTGCTGTTTAGCAGGACCATATCCGGAGGCTTTAATGTTACCCGGCAGGGCTT',
 1)

## Test `in_silico_best_mut_seqs`

In [10]:
mut_ohe_seqs, deltas, delta_inds = eu.interpret.best_mut_seqs(model, ohe_seqs, batch_size=32)
for i in range(len(mut_ohe_seqs)):
    mut_seq = eu.pp.decode_seq(mut_ohe_seqs[i])
    assert eu.pp._utils._hamming_distance(seqs[i], mut_seq) == 1
    if i < 3:
        print(deltas[i], delta_inds[i])
        print(seqs[i][delta_inds[i]], mut_seq[delta_inds[i]])
        print(seqs[i])
        print(mut_seq)
        print(eu.pp._utils._hamming_distance(seqs[i], mut_seq))
        print()

0.015379578 55
A C
AGGACAGATTTTCGCGTGTTGGGCCCAACGGATCAGCCTCTATAAACCGTATCCGACAATATAAGG
AGGACAGATTTTCGCGTGTTGGGCCCAACGGATCAGCCTCTATAAACCGTATCCGCCAATATAAGG
1

0.025349513 50
G C
TGACTCCAGGAAGGACGTGTTGTTCGAGAAGAGCAGCCGAAAAAGTTAGCGATCGGCTCGACTTTC
TGACTCCAGGAAGGACGTGTTGTTCGAGAAGAGCAGCCGAAAAAGTTAGCCATCGGCTCGACTTTC
1

0.024218425 13
C A
TATACTAGGAAGTCGACTTAAGAGATGCAAACAAAGAGTGGGGCATATTATCTAGCCAGACGTCAC
TATACTAGGAAGTAGACTTAAGAGATGCAAACAAAGAGTGGGGCATATTATCTAGCCAGACGTCAC
1



## Test `in_silico_evolution`

In [11]:
evolved_ohe_seq, deltas, delta_pos = eu.interpret.evolution(
    model, 
    ohe_seq, 
    force_different=True)

evolved_seq = eu.pp.decode_seq(evolved_ohe_seq)
for i in range(len(deltas)):
    print(deltas[i], delta_pos[i])
    print(seq[delta_pos[i]], evolved_seq[delta_pos[i]])
print(seq)
print(evolved_seq)
print(eu.pp._utils._hamming_distance(seq, evolved_seq))

[[ 2 61]
 [ 0 40]
 [ 0 61]
 [ 3 56]
 [ 3 57]
 [ 0  6]
 [ 3 23]
 [ 2  5]
 [ 1 39]
 [ 0  3]] [61 40 61 56 57  6 23  5 39  3] [0.00944884 0.00889377 0.00944884 0.00746579 0.00596726 0.00487195
 0.00419958 0.00401482 0.00401039 0.00396453]
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
[[ 0 40]
 [ 3 56]
 [ 0  6]
 [ 3 57]
 [ 2  5]
 [ 0  3]
 [ 1 32]
 [ 2  1]
 [ 1 62]
 [ 1 43]] [40 56  6 57  5  3 32  1 62 43] [0.01000783 0.00768696 0.00515807 0.0044408  0.00430094 0.00425066
 0.00414926 0.0038837  0.00353384 0.0033012 ]
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
[[ 3 56]
 [ 2 35]
 [ 0 35]
 [ 3 23]
 [ 1 32]
 [ 3 27]
 [ 2  1]
 [ 1 27]
 [ 0 38]
 [ 0  6]] [56 35 35 23 32 27  1 27 38  6] [0.00799286 0.00742814 0.00742814 0.00555712 0.00519584 0.00423892
 0.00360246 0.00423892 0.00344777 0.00329013]
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
(4, 66)
[[ 2 35]
 [ 3 23]
 [ 1 32]
 [ 0 35]
 [ 2  1]
 [ 3 11

In [12]:
sdata_subset = sdata[:32]

In [20]:
evolved_seqs = eu.interpret.evolve_seqs_sdata(
    model,
    sdata_subset,
    rounds=5,
    force_different=True,
    return_seqs=True
)

Evolving seqs:   0%|          | 0/32 [00:00<?, ?it/s]

SeqData object modified:
    seqs_annot:
        + evolved_5_scores


In [21]:
sdata_subset.seqs_annot

Unnamed: 0,target,train_val,original_scores,evolved_3_scores,evolved_5_scores
seq000,0.866168,True,0.174413,0.214282,0.235417
seq001,0.800737,True,0.147729,0.203262,0.223784
seq002,0.703108,True,0.155737,0.2027,0.219492
seq003,0.465782,False,0.154625,0.197601,0.215584
seq004,0.676781,False,0.158724,0.207494,0.225771
seq005,0.310314,True,0.135241,0.197334,0.227165
seq006,0.954574,True,0.151677,0.20496,0.224981
seq007,0.004132,True,0.157992,0.20527,0.223052
seq008,0.030239,True,0.214875,0.239488,0.253772
seq009,0.319772,True,0.183135,0.223346,0.236756


---

# Scratch