In [1]:
%load_ext autoreload
%autoreload 2
# %cd 'python/AggNet'
%ls

/home/hew/python/AggNet
APNet_CPAD2_results.csv  [0m[01;34mcheckpoint[0m/  [01;34mdataset[0m/       install.sh  [01;34mscript[0m/
APNet_results.csv        CPAD2.ipynb  example.ipynb  [01;34mmodel[0m/      [01;34mtemp[0m/
APRNet_results.csv       [01;34mdata[0m/        [01;34mfigure[0m/        README.md   [01;34mutils[0m/


In [2]:
import pandas as pd

from model.APNet.data_module import DataModule
from model.APNet.lightning_module import LightningModule
from model.APRNet import APRNet
from utils.file import read_fasta
from utils.lightning import LitModelInference, merge_batch_prediction

root_path: /home/hew/python/AggNet


# Amyloid Peptide Prediction

## load data

In [3]:
fasta_file = './data/AmyHex/Hex142.fasta'
peptides, _ = read_fasta(fasta_file)

## load model

In [4]:
checkpoint = './checkpoint/APNet.ckpt'
batch_size = 256
APNet = LitModelInference(LightningModule, DataModule, checkpoint)
APNet.set_batch_size(batch_size=batch_size, num_workers=1)

[loading checkpoint]: ./checkpoint/APNet.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


## run prediction

In [5]:
predictions = APNet.predict(peptides)
results = merge_batch_prediction(predictions)
results

[prepare custom predict dataset] 142
[self.predict_dataset] 142


  0%|          | 0/1 [00:00<?, ?it/s]



defaultdict(<function utils.lightning.predict_utils.merge_batch_prediction.<locals>.<lambda>()>,
            {'loss': {'values': [tensor(0.8899, device='cuda:0')],
              'contains_none': False,
              'dtype': torch.Tensor},
             'preds': tensor([[ 0.6034, -0.3299],
                     [-1.8226,  1.5184],
                     [-2.0010,  1.7504],
                     [-1.6670,  1.5499],
                     [-0.4452,  0.3912],
                     [-0.4491,  0.4443],
                     [ 1.4055, -0.9238],
                     [ 0.1318, -0.1484],
                     [-2.1993,  2.0481],
                     [-1.4407,  1.1843],
                     [-1.1774,  0.8891],
                     [-1.3242,  1.1815],
                     [-1.9393,  1.9351],
                     [-1.8672,  1.5460],
                     [-3.0202,  2.5418],
                     [-1.0115,  0.8029],
                     [-1.5662,  1.2414],
                     [-2.5067,  2.0936],
             

## merge results

In [6]:
probs = results['preds'].cpu().softmax(dim=-1).numpy()
pred_labels = ['amyloid' if p[1] > 0.5 else 'non-amyloid' for p in probs]
peptide_df = pd.DataFrame({'peptide': peptides,
                           'probability': probs[:, 1],
                           'label': pred_labels})
peptide_df

Unnamed: 0,peptide,probability,label
0,YVEYIG,0.282254,non-amyloid
1,IQIVYK,0.965807,amyloid
2,GYVIIK,0.977053,amyloid
3,STVIIL,0.961463,amyloid
4,SGVIIE,0.697697,amyloid
...,...,...,...
137,STVDIE,0.145791,non-amyloid
138,YQLENY,0.425880,non-amyloid
139,IIPFEQ,0.002803,non-amyloid
140,FQKQQK,0.074111,non-amyloid


# Protein Aggregation Profile

In [7]:
APRNet_struct_params = {
    'beta': 3.36,
    'delta': 0.4,
    't_start': 0.51,
    't_expand': 0.37,
    't_patience': 9,
}
APRNet_seq_params = {
    't_start': 0.46,
    't_expand': 0.37,
    't_patience': 7,
}

## load data

In [8]:
# WFL VH
sequence = 'QVQLVQSGAEVKKPGSSVKVSCKASGGTFWFGAFTWVRQAPGQGLEWMGGIIPIFGLTNLAQNFQGRVTITADESTSTVYMELSSLRSEDTAVYYCARSSRIYDLNPSLTAYYDMDVWGQGTMVTVSS'
structure = None
checkpoint = './checkpoint/APNet.ckpt'

## load model

In [9]:
params = APRNet_struct_params if structure is not None else APRNet_seq_params
structure = None if structure is None else [structure]

# load model
APNet = LitModelInference(LightningModule, DataModule, checkpoint)
aprnet = APRNet.APRNet(APNet)
aprnet

[loading checkpoint]: ./checkpoint/APNet.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


<model.APRNet.APRNet.APRNet at 0x1512c9bd2f90>

## run prediction

In [10]:
pred_labels, pred_scores = aprnet([sequence], structure, **params)

[prepare custom predict dataset] 123
[self.predict_dataset] 123


  0%|          | 0/1 [00:00<?, ?it/s]



## merge results

In [11]:
labels, scores = pred_labels[0], pred_scores[0]
profile_df = pd.DataFrame({'residue': list(sequence),
                           'scores': scores,
                           'APR': labels})
profile_df

Unnamed: 0,residue,scores,APR
0,Q,0.078544,0
1,V,0.149857,0
2,Q,0.154817,0
3,L,0.319754,0
4,V,0.322026,0
...,...,...,...
123,V,0.432922,0
124,T,0.337510,0
125,V,0.149308,0
126,S,0.094948,0
