# Testing `in_silico_evolution` function

**Authorship:**
Adam Klie, *08/08/2022*
***
**Description:**
Notebook for testing the `in_silico_evolution` function

In [1]:
import numpy as np
import pandas as pd

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

In [2]:
import eugene as eu

Global seed set to 13
2022-08-10 01:03:09.016627: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-10 01:03:09.016664: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Data E-T-L

In [3]:
sdata = eu.datasets.random1000()
eu.pp.prepare_data(sdata)
sdata

  0%|          | 0/3 [00:00<?, ?it/s]

SeqData object modified:
	rev_seqs: None -> 1000 rev_seqs added
	ohe_seqs: None -> 1000 ohe_seqs added
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added
    seqs_annot:
        + train


SeqData object with = 1000 seqs
seqs = (1000,)
names = (1000,)
rev_seqs = (1000,)
ohe_seqs = (1000, 66, 4)
ohe_rev_seqs = (1000, 66, 4)
seqs_annot: 'target', 'train'
pos_annot: PyRanges object with 1400 features
seqsm: None
uns: None

# Model I&I

In [35]:
model = eu.models.DeepBind(input_len=66, output_dim=1)
eu.models.base.init_weights(model)

# Test API

In [40]:
# Set-up of some sequences to test
seq_num = np.random.choice(32, size=1, replace=False).squeeze()
seqs = sdata.seqs[:32]
seq = seqs[seq_num]
ohe_seqs = sdata.ohe_seqs[:32]
ohe_seq = ohe_seqs[seq_num]
seq, ohe_seq.shape, ohe_seqs.shape

('TATAACCTTTCTTGGGGCGACGATAAGCGCAATAACAAGTCCGTCTGCTTCCAAAAGCGATACTGA',
 (66, 4),
 (32, 66, 4))

## Test `in_silico_best_k_muts`

In [44]:
mut_ohe_seq, delta, delta_ind = eu.interpret.in_silico_best_k_muts(model, ohe_seq, k=1)
mut_seq = eu.pp.decode_DNA_seq(mut_ohe_seq.squeeze(axis=0))
seq[delta_ind.squeeze()], mut_seq[delta_ind.squeeze()], seq, mut_seq, eu.preprocessing._utils._hamming_distance(seq, mut_seq)

('A',
 'T',
 'TATAACCTTTCTTGGGGCGACGATAAGCGCAATAACAAGTCCGTCTGCTTCCAAAAGCGATACTGA',
 'TATTACCTTTCTTGGGGCGACGATAAGCGCAATAACAAGTCCGTCTGCTTCCAAAAGCGATACTGA',
 1)

## Test `in_silico_best_mut_seqs`

In [45]:
mut_ohe_seqs, deltas, delta_inds = eu.interpret.in_silico_best_mut_seqs(model, ohe_seqs, batch_size=32)
for i in range(len(mut_ohe_seqs)):
    mut_seq = eu.pp.decode_DNA_seq(mut_ohe_seqs[i])
    assert eu.pp._utils._hamming_distance(seqs[i], mut_seq) == 1
    if i < 3:
        print(deltas[i], delta_inds[i])
        print(seqs[i][delta_inds[i]], mut_seq[delta_inds[i]])
        print(seqs[i])
        print(mut_seq)
        print(eu.pp._utils._hamming_distance(seqs[i], mut_seq))
        print()

0.00091798604 32
T A
AGGACAGATTTTCGCGTGTTGGGCCCAACGGATCAGCCTCTATAAACCGTATCCGACAATATAAGG
AGGACAGATTTTCGCGTGTTGGGCCCAACGGAACAGCCTCTATAAACCGTATCCGACAATATAAGG
1

0.00077727437 42
A C
TGACTCCAGGAAGGACGTGTTGTTCGAGAAGAGCAGCCGAAAAAGTTAGCGATCGGCTCGACTTTC
TGACTCCAGGAAGGACGTGTTGTTCGAGAAGAGCAGCCGAAACAGTTAGCGATCGGCTCGACTTTC
1

0.0009640306 42
G C
TATACTAGGAAGTCGACTTAAGAGATGCAAACAAAGAGTGGGGCATATTATCTAGCCAGACGTCAC
TATACTAGGAAGTCGACTTAAGAGATGCAAACAAAGAGTGGGCCATATTATCTAGCCAGACGTCAC
1



## Test `in_silico_evolution`

In [46]:
evolved_ohe_seq, deltas, delta_pos = eu.interpret.in_silico_evolution(
    model, 
    ohe_seq, 
    force_different=True)

evolved_seq = eu.pp.decode_DNA_seq(evolved_ohe_seq)
for i in range(len(deltas)):
    print(deltas[i], delta_pos[i])
    print(seq[delta_pos[i]], evolved_seq[delta_pos[i]])
print(seq)
print(evolved_seq)
print(eu.preprocessing._utils._hamming_distance(seq, evolved_seq))

0.0009134412 3
A T
0.0007402301 54
A C
0.0006982386 40
C T
0.00059239566 56
G A
0.00069119036 9
T A
0.00069236755 41
C G
0.0007134974 42
G C
0.0006405711 57
C A
0.0006160885 11
T A
0.00067421794 52
A T
TATAACCTTTCTTGGGGCGACGATAAGCGCAATAACAAGTCCGTCTGCTTCCAAAAGCGATACTGA
TATTACCTTACATGGGGCGACGATAAGCGCAATAACAAGTTGCTCTGCTTCCTACAAAGATACTGA
10


---

# Scratch