# Example of running the MCTS algorithm to generate peptides

## Import packages

In [1]:
import numpy as np
import pandas as pd
from tensorflow import keras
import sys
import timeit
sys.path.append('../peptide_generators/')
from mcts_camsol import Node, mcts
# to generate peptides with the "three tryptophan constraint"
# from mcts_tryptophan limits import Node, mcts
# to generate peptides that preferentially bind to one plastic over another
# from mcts_competing_design import Node, mcts

## Define a sequence generation function

In [2]:
def generate_seqs(num_seqs, surrogate_model, num_iterations=2000, exploration_param=1.0, sf=1.0):
    seqs, scores = [], []
    for i in range(num_seqs):
        root = Node([])
        start = timeit.default_timer()
        seq, score = mcts(root, surrogate_model, num_iterations, exploration_param, sf)
        seqs.append(seq)
        scores.append(score)
        stop = timeit.default_timer()
        print('Run', i+1, 'done...', 'Time:', stop-start)
        print('Sequence:', seq, 'Score:', score)
    return seqs, scores

# This function needs slight modification to generate peptides with the "three-tryptophan constraint" 
# or to generate peptides that preferentially bind to one plastic over another

## Load the trained score predictor

In [3]:
# Here we load the trained PE model as an example
# To generate peptides using the competing design strategy, both models for PE and PS need to be loaded
surrogate_model = keras.models.load_model('../score_predictors/pe/trained_model/')



## Run the algorithm

In [4]:
# Generate 10 peptides with a scaling factor of 2.0
# To generate peptides without any constraint, set sf = 0.0
sf = 2.0
seqs, scores = generate_seqs(num_seqs=10, surrogate_model=surrogate_model, num_iterations=2000, exploration_param=1.0, sf=sf)

Run 1 done... Time: 87.587777
Sequence: [13, 3, 12, 16, 12, 12, 7, 5, 16, 9, 3, 12] Score: -51.66753788930777
Run 2 done... Time: 80.00834510000001
Sequence: [12, 16, 8, 16, 16, 13, 16, 16, 17, 6, 0, 3] Score: -53.5555723495745
Run 3 done... Time: 79.14935940000004
Sequence: [12, 16, 8, 16, 16, 9, 17, 6, 11, 17, 3, 16] Score: -48.497731394566124
Run 4 done... Time: 81.4181653
Sequence: [16, 3, 16, 7, 12, 5, 9, 16, 9, 9, 12, 3] Score: -51.55953598740664
Run 5 done... Time: 80.92469890000001
Sequence: [16, 16, 16, 8, 8, 12, 12, 16, 16, 12, 9, 12] Score: -53.35455507353723
Run 6 done... Time: 79.94938299999995
Sequence: [16, 7, 9, 16, 16, 13, 16, 17, 1, 8, 15, 5] Score: -51.749561359720424
Run 7 done... Time: 79.79710470000009
Sequence: [5, 16, 16, 16, 12, 9, 9, 3, 16, 16, 5, 5] Score: -54.67891520502555
Run 8 done... Time: 80.1347151
Sequence: [9, 5, 16, 16, 7, 8, 16, 16, 16, 12, 5, 3] Score: -54.46932668393645
Run 9 done... Time: 78.65486169999997
Sequence: [8, 16, 7, 7, 12, 9, 3, 6, 16

In [5]:
amino_acid_alphabet = ['A', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 
                       'M', 'N', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
amino_acid_dict = {i: aa for i, aa in enumerate(amino_acid_alphabet)}
sequences = ["".join(amino_acid_dict[num] for num in row) for row in seqs]

In [6]:
df_seq = pd.Series(data=sequences, name='Sequences')
df_score = pd.Series(data=scores, name='Scores')
df = pd.concat([df_seq, df_score], axis=1)
df

Unnamed: 0,Sequences,Scores
0,SFRWRRKHWMFR,-51.667538
1,RWLWWSWWYIAF,-53.555572
2,RWLWWMYIQYFW,-48.497731
3,WFWKRHMWMMRF,-51.559536
4,WWWLLRRWWRMR,-53.354555
5,WKMWWSWYDLVH,-51.749561
6,HWWWRMMFWWHH,-54.678915
7,MHWWKLWWWRHF,-54.469327
8,LWKKRMFIWMFY,-51.117883
9,WFMWRMWWRRRQ,-52.958619
