In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff

In [None]:
from functools import reduce

In [None]:
from model_utils.utils_model import Evaluator

In [None]:
df1 = pd.read_pickle('../2021_01_12/data/synthetic_s1_pred_1000_t0p1_k1.pkl.gz')
df1 = df1[['seq', 'len', 'bounding_boxes', 'bb_stem', 'bb_iloop', 'bb_hloop']]
df1 = df1.rename(columns={'bb_stem': 'stem_1', 'bb_iloop': 'iloop_1', 'bb_hloop': 'hloop_1'})

df2 = pd.read_pickle('../2021_01_12/data/synthetic_s1_pred_1000_t0p1_k10_c0p9.pkl.gz')
df2 = df2[['seq', 'bb_stem', 'bb_iloop', 'bb_hloop']]
df2 = df2.rename(columns={'bb_stem': 'stem_2', 'bb_iloop': 'iloop_2', 'bb_hloop': 'hloop_2'})

df3 = pd.read_pickle('../2021_01_12/data/synthetic_s1_pred_1000_t0p1_k10_c0p5.pkl.gz')
df3 = df3[['seq', 'bb_stem', 'bb_iloop', 'bb_hloop']]
df3 = df3.rename(columns={'bb_stem': 'stem_3', 'bb_iloop': 'iloop_3', 'bb_hloop': 'hloop_3'})

df4 = pd.read_pickle('../2021_01_12/data/synthetic_s1_pred_1000_t0p1_k50_c0p9.pkl.gz')
df4 = df4[['seq', 'bb_stem', 'bb_iloop', 'bb_hloop']]
df4 = df4.rename(columns={'bb_stem': 'stem_4', 'bb_iloop': 'iloop_4', 'bb_hloop': 'hloop_4'})

df5 = pd.read_pickle('../2021_01_12/data/synthetic_s1_pred_1000_t0p02_k10_c0p9.pkl.gz')
df5 = df5[['seq', 'bb_stem', 'bb_iloop', 'bb_hloop']]
df5 = df5.rename(columns={'bb_stem': 'stem_5', 'bb_iloop': 'iloop_5', 'bb_hloop': 'hloop_5'})

In [None]:
evaluator = Evaluator(predictor=None) 

In [None]:
len(df1)

In [None]:
df = reduce(lambda x, y: pd.merge(x, y, on='seq'), [df1, df2, df3, df4, df5])



In [None]:
len(df)

In [None]:
for i in range(1, 5+1):
    df['n_stem_{}'.format(i)] = df['stem_{}'.format(i)].apply(lambda x: 0 if x is None else len(x))
    df['n_iloop_{}'.format(i)] = df['iloop_{}'.format(i)].apply(lambda x: 0 if x is None else len(x))
    df['n_hloop_{}'.format(i)] = df['hloop_{}'.format(i)].apply(lambda x: 0 if x is None else len(x))

In [None]:
df.head(1)

In [None]:
fig = px.scatter_matrix(df[['n_stem_1', 'n_stem_2', 'n_stem_3', 'n_stem_4', 'n_stem_5']],
                       labels={'n_stem_1': 't=0.1,k=1',
                               'n_stem_2': 't=0.1,k=10,c=0.9', 
                               'n_stem_3': 't=0.1,k=10,c=0.5',
                               'n_stem_4': 't=0.1,k=50,c=0.9', 
                               'n_stem_5': 't=0.02,k=10,c=0.9'})
fig.update_traces(diagonal_visible=False)
fig.update_layout(
    title='Number of predicted stems',
    width=1000,
    height=1000,
)
fig.show()

In [None]:
df_stem_sensitivity = []
for _, row in df.iterrows():
    data = {'seq': row['seq'], 'len': row['len']}
    # convert old data format to new
    df_target_stem, df_target_iloop, df_target_hloop = evaluator.make_target_bb_df(row['bounding_boxes'], 
                                                                                   convert_tl_to_tr=True)
    for i in range(1, 5+1):
        df_stem = pd.DataFrame(row['stem_{}'.format(i)])
        m = evaluator.calculate_bb_metrics(df_target=df_target_stem, df_pred=df_stem[['bb_x', 'bb_y', 'siz_x', 'siz_y']])
        data['s_identical_{}'.format(i)] = float(m['n_target_identical'])/m['n_target_total']
        # union
        data['s_overlap_{}'.format(i)] = float(m['n_target_identical'] + m['n_target_overlap'])/m['n_target_total']
    df_stem_sensitivity.append(data)
df_stem_sensitivity = pd.DataFrame(df_stem_sensitivity)
        

In [None]:
# marginal
# fig = px.histogram(df_stem_sensitivity, 
#              x='s_identical_1')
# fig.add_trace(px.histogram(df_stem_sensitivity, 
#              x='s_identical_2').data[0])

x1 = df_stem_sensitivity['s_identical_1'].to_numpy()
x2 = df_stem_sensitivity['s_identical_2'].to_numpy()
x3 = df_stem_sensitivity['s_identical_3'].to_numpy()
x4 = df_stem_sensitivity['s_identical_4'].to_numpy()
x5 = df_stem_sensitivity['s_identical_5'].to_numpy()

df_plot = pd.DataFrame(dict(
    parameter=np.concatenate((['t=0.1,k=1']*len(x1), 
                           ['t=0.1,k=10,c=0.9']*len(x2),
                          ['t=0.1,k=10,c=0.5']*len(x3),
                          ['t=0.1,k=50,c=0.9']*len(x4),
                          ['t=0.02,k=10,c=0.9']*len(x5))), 
    sensitivity=np.concatenate((x1,x2,x3,x4,x5))
))

px.histogram(df_plot, x="sensitivity", color="parameter", marginal="rug", histnorm='percent',
             barmode="group", nbins=50)

# fig = ff.create_distplot([df_stem_sensitivity['s_identical_1'], df_stem_sensitivity['s_identical_2']], 
#                          ['s_identical_1', 's_identical_2'])
# fig.show()

In [None]:
fig = px.scatter_matrix(df_stem_sensitivity[['s_identical_1', 's_identical_2', 's_identical_3', 's_identical_4', 's_identical_5']],
                       labels={'s_identical_1': 't=0.1,k=1',
                               's_identical_2': 't=0.1,k=10,c=0.9', 
                               's_identical_3': 't=0.1,k=10,c=0.5',
                               's_identical_4': 't=0.1,k=50,c=0.9', 
                               's_identical_5': 't=0.02,k=10,c=0.9'})
fig.update_traces(diagonal_visible=False)
fig.update_layout(
    title='Sensitivity (identical stem bb)',
    width=1000,
    height=1000,
#     xaxis={
#       "range": [0, 1]},
#     yaxis={
#       "range": [0, 1]}
)
fig.update_xaxes(range=[0, 1])
fig.update_yaxes(range=[0, 1])
fig.show()

In [None]:
fig = px.scatter_matrix(df_stem_sensitivity[['s_overlap_1', 's_overlap_2', 's_overlap_3', 's_overlap_4', 's_overlap_5']],
                       labels={'s_overlap_1': 't=0.1,k=1',
                               's_overlap_2': 't=0.1,k=10,c=0.9', 
                               's_overlap_3': 't=0.1,k=10,c=0.5',
                               's_overlap_4': 't=0.1,k=50,c=0.9', 
                               's_overlap_5': 't=0.02,k=10,c=0.9'})
fig.update_traces(diagonal_visible=False)
fig.update_layout(
    title='Sensitivity (overlap stem bb)',
    width=1000,
    height=1000,
    xaxis={
      "range": [0, 1]},
    yaxis={
      "range": [0, 1]}
)
fig.show()