In [1]:
from aptatrans_pipeline import AptaTransPipeline

# Initialize the Pipeline

In [2]:
pipeline = AptaTransPipeline(
    d_model=128,
    d_ff=512,
    n_layers=6,
    n_heads=8,
    dropout=0.1,
    load_best_pt=True,
    device='cuda',
    seed=1004,
)

Best pre-trained models are loaded!


# Pretrain the Aptamer Encoder

In [3]:
pipeline.set_data_rna_pt(batch_size=68) # dataset from bpRNA
pipeline.pretrain_aptamer(epochs=1000, lr=1e-5)

# Pretrain the encoder for protein

In [4]:
pipeline.set_data_protein_pt(batch_size=68) # dataset from PDB
pipeline.pretrain_protein(epochs=1000, lr=1e-5)

# Fine-tune AptaTrans for API Prediction

In [5]:
pipeline.set_data_for_training(batch_size=16)
pipeline.train(epochs=200, lr=1e-5)

# Predict API Scores

In [6]:
# your aptamer
aptamer = 'AACGCCGCGCGUUUAACUUCC'

# target protein
target = 'STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK'

In [7]:
pipeline.inference(aptamer, target)

Predict the Aptamer-Protein Interaction
load the best model for api!
Aptamer :  AACGCCGCGCGUUUAACUUCC
Target Protein :  STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK
Score :  [[0.91837984]]


array([[0.91837984]], dtype=float32)

# Recommend Candidate Aptamers

In [8]:
target = 'STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK'

In [None]:
pipeline.recommend(target, n_aptamers=5, depth=40, iteration=1000)

load the best model for api!
1 round start!!!


  return torch._native_multi_head_attention(


best subsequence: C__G_U_G
Depth: 4
00:00:8.107326
2 round start!!!
best subsequence: C__G_U_GA_U__U
Depth: 7
00:00:16.187382
3 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_
Depth: 10
00:00:24.268504
4 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG_
Depth: 13
00:00:32.436020
5 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__C
Depth: 16
00:00:40.685504
6 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__CU_U__A
Depth: 19
00:00:48.925924
7 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__CU_U__AC__C_G
Depth: 22
00:00:56.993840
8 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__CU_U__AC__C_GU__GA_
Depth: 25
00:01:5.087333
9 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__CU_U__AC__C_GU__GA_U__AA__C
Depth: 29
00:01:13.221608
10 round start!!!
best subsequence: C__G_U_GA_U__UU_C_A_C__GG__AA__CU_U__AC__C_GU__GA_U__AA__CA_G__C_A
Depth: 33
00:01:21.251776
11 round start!!!
best subsequence: C__G_U_G