In [1]:
from aptatrans_pipeline import AptaTransPipeline

# Initialize the Pipeline

In [2]:
pipeline = AptaTransPipeline(
    dim=128,
    mult_ff=2,
    n_layers=6,
    n_heads=8,
    dropout=0.1,
    load_best_pt=False,
    device='cuda',
    seed=1004,
)

# Pretrain the Aptamer Encoder

In [4]:
pipeline.set_data_rna_pt(batch_size=32) # dataset from bpRNA

In [None]:
pipeline.pretrain_encoder_aptamer(epochs=10, lr=1e-5)

# Pretrain the encoder for protein

In [3]:
pipeline.set_data_protein_pt(batch_size=32) # dataset from PDB

In [None]:
pipeline.pretrain_encoder_protein(epochs=1000, lr=1e-5)

# Fine-tune AptaTrans for API Prediction

In [4]:
pipeline.set_data_for_training(batch_size=16)

In [None]:
pipeline.train(epochs=200, lr=1e-5)

# Predict API Scores

In [3]:
# your aptamer
aptamer = 'AACGCCGCGCGUUUAACUUCC'

# target protein
target = 'STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK'

In [4]:
pipeline.inference(aptamer, target)

Predicting the Aptamer-Protein Interaction
Loading the best model for API!
Aptamer: AACGCCGCGCGUUUAACUUCC
Target Protein: STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK
Score: [[0.4737131]]


array([[0.4737131]], dtype=float32)

# Recommend Candidate Aptamers

In [5]:
target = 'STEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSK'

In [6]:
pipeline.recommend(target, n_aptamers=5, depth=40, iteration=1000)

Loading the best model for API!
1 round start!!!
best subsequence: C__CA_
Depth: 3
00:00:7.853315
2 round start!!!
best subsequence: C__CA__CG__C
Depth: 6
00:00:15.698875
3 round start!!!
best subsequence: C__CA__CG__C_UG__C
Depth: 9
00:00:23.564412
4 round start!!!
best subsequence: C__CA__CG__C_UG__CC_G__C
Depth: 12
00:00:31.391053
5 round start!!!
best subsequence: C__CA__CG__C_UG__CC_G__CA__U_G
Depth: 15
00:00:39.248983
6 round start!!!
best subsequence: C__CA__CG__C_UG__CC_G__CA__U_G_A_UC_
Depth: 18
00:00:47.136729
7 round start!!!
best subsequence: C__CA__CG__C_UG__CC_G__CA__U_G_A_UC__U_GC_
Depth: 21
00:00:55.095853
8 round start!!!
best subsequence: C__CA__CG__C_UG__CC_G__CA__U_G_A_UC__U_GC__UA__A
Depth: 24
00:01:2.970259
9 round start!!!
