In [1]:
import pandas as pd
import numpy as np

In [2]:
from model_utils.utils_s2 import Predictor

In [3]:
import dgutils.pandas as dgp

In [4]:
predictor = Predictor('result/synthetic_s2_training_2/model_ckpt_ep_12.pth')

In [5]:
# toy example?


'''
   bb_x  bb_y  siz_x  siz_y bb_type  n_proposal  prob_median  n_proposal_norm
0     4     9      6      6   hloop          15     0.930053         0.416667
1     1    12      4      4    stem          16     0.925148         1.000000
2    41    50     10     10   hloop          45     0.992122         0.450000
3    38    53      4      4    stem          16     0.994010         1.000000
4    36    55      3      3   iloop           9     0.995822         1.000000
5    31    60      6      6    stem          36     0.978096         1.000000
'''


stems = pd.DataFrame([{'bb_x': 1, 'bb_y': 12, 'siz_x': 4, 'siz_y': 4, 'prob_median': 0.93, 'n_proposal_norm': 1.0},
                     {'bb_x': 38, 'bb_y': 53, 'siz_x': 4, 'siz_y': 4, 'prob_median': 0.99, 'n_proposal_norm': 1.0},
                     {'bb_x': 31, 'bb_y': 60, 'siz_x': 6, 'siz_y': 6, 'prob_median': 0.98, 'n_proposal_norm': 1.0}])

iloops = pd.DataFrame([{'bb_x': 36, 'bb_y': 55, 'siz_x': 3, 'siz_y': 3, 'prob_median': 0.99, 'n_proposal_norm': 1.0}])

hloops = pd.DataFrame([{'bb_x': 4, 'bb_y': 9, 'siz_x': 6, 'siz_y': 6, 'prob_median': 0.93, 'n_proposal_norm': 0.83},
                      {'bb_x': 41, 'bb_y': 50, 'siz_x': 10, 'siz_y': 10, 'prob_median': 0.99, 'n_proposal_norm': 0.9}])



In [6]:
predictor.predict(stems, iloops, hloops)

tensor([[[0.8489],
         [0.9907],
         [0.9960],
         [0.9910],
         [0.9029],
         [0.9904]]], grad_fn=<SigmoidBackward>)

In [7]:
def summarize_df(df, hloop=False):
    # calculate median prob and n_proposal_norm
    
    def _tmp(siz_x, siz_y, prob):
        prob_median = np.median(prob)
        n_proposal_norm = len(prob)/float(siz_x * siz_y)
        if hloop:
            n_proposal_norm = 2 * n_proposal_norm
        return prob_median, n_proposal_norm
        
    df = dgp.add_columns(df, ['prob_median', 'n_proposal_norm'],
                        ['siz_x', 'siz_y', 'prob'], _tmp)
    # subset columns
    df = df[['bb_x', 'bb_y', 'siz_x', 'siz_y', 'prob_median', 'n_proposal_norm']]
    return df

In [8]:
df = pd.read_pickle('../2020_11_24/data/rfam151_s1_pruned.pkl.gz')

In [9]:
x = df.iloc[4]
x = df.iloc[6]

In [10]:
x

len                                                              54
one_idx           ([1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 28, 29, ...
seq               CAAAAGUCUGGGCUAAGCCCACUGAUGAGCCGCUGAAAUGCGGCGA...
seq_id                                                    RF00008_B
bounding_boxes    [((1, 47), (6, 6), stem), ((8, 16), (5, 5), st...
bb_stem           [{'bb_x': 0, 'bb_y': 26, 'siz_x': 2, 'siz_y': ...
bb_iloop          [{'bb_x': 1, 'bb_y': 25, 'siz_x': 4, 'siz_y': ...
bb_hloop          [{'bb_x': 12, 'bb_y': 16, 'siz_x': 5, 'siz_y':...
df_target         [{'bb_x': 1, 'bb_y': 52, 'siz_x': 6, 'siz_y': ...
n_bb_found                                                        2
Name: 6, dtype: object

In [11]:
stems = summarize_df(pd.DataFrame(x['bb_stem']))
iloops = summarize_df(pd.DataFrame(x['bb_iloop']))
hloops = summarize_df(pd.DataFrame(x['bb_hloop']), hloop=True)

In [12]:
pd.DataFrame(x['df_target'])

Unnamed: 0,bb_x,bb_y,siz_x,siz_y,bb_type
0,1,52,6,6,stem
1,8,20,5,5,stem
2,28,43,4,4,stem
3,12,16,5,5,hloop
4,31,40,10,10,hloop


In [13]:
pred = predictor.predict(stems, iloops, hloops)
pred = pred[0, :, 0].detach().numpy()

In [14]:
assert len(pred) == len(stems) + len(iloops) + len(hloops)

In [15]:
stems['pred'] = pred[:len(stems)]
iloops['pred'] = pred[len(stems):len(stems) + len(iloops)]
hloops['pred'] = pred[-len(hloops):]

In [16]:
stems

Unnamed: 0,bb_x,bb_y,siz_x,siz_y,prob_median,n_proposal_norm,pred
0,0,26,2,2,0.114655,1.0,0.571613
1,0,53,7,7,0.721856,1.0,0.373738
2,4,22,3,3,0.135244,0.777778,0.460573
3,5,25,3,3,0.181055,1.0,0.489539
4,8,20,5,5,0.648889,0.4,0.691276
5,9,19,4,4,0.127608,0.375,0.148285
6,11,43,3,3,0.07163,0.555556,0.001648
7,28,43,5,5,0.874696,1.0,0.990429


In [17]:
iloops

Unnamed: 0,bb_x,bb_y,siz_x,siz_y,prob_median,n_proposal_norm,pred
0,1,25,4,4,0.113429,0.375,0.220247
1,6,20,4,2,0.16367,0.625,0.327625
2,6,47,3,3,0.263365,0.888889,0.005395
3,6,47,6,5,0.100151,0.2,0.001614
4,7,23,2,4,0.220219,0.75,0.389061


In [18]:
hloops

Unnamed: 0,bb_x,bb_y,siz_x,siz_y,prob_median,n_proposal_norm,pred
0,12,16,5,5,0.917434,0.8,0.971985
1,32,39,8,8,0.899188,0.875,0.994422


In [19]:
pred

array([0.5716126 , 0.37373847, 0.4605726 , 0.48953927, 0.6912757 ,
       0.14828518, 0.00164808, 0.9904288 , 0.2202472 , 0.32762548,
       0.00539522, 0.00161387, 0.38906115, 0.9719848 , 0.99442166],
      dtype=float32)