In [67]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import roc_curve, auc

In [68]:
exp_id = 'exp0'
exp_path = os.path.join('experiments', exp_id)
df_summ = pd.read_hdf(os.path.join(exp_path, 'summary_data.hdf5'), 'summary')
metrics = pd.read_hdf(os.path.join(exp_path, 'metrics.hdf5'), 'metrics')
print(df_summ.shape)
print(metrics.shape)

(50, 4)
(2350, 6)


In [69]:
df = pd.merge(metrics, df_summ[['cluster', 'entry']], on="entry", how="outer")
df.sort_values(by=['epoch'], inplace = True)
print(df.shape)
print(df.cluster.notna().all())
df.head()

(2350, 7)
True


Unnamed: 0,phase,epoch,entry,output,target,loss,cluster
0,training,0.0,residue-ppi-BA_55312.BL00080001:M-P,"[2.8488917891422716e-08, 1.0]",0.0,2.596328,3.0
1173,training,0.0,residue-ppi-BA_55234.BL00040001:M-P,"[4.3718620190702495e-07, 0.9999995231628418]",1.0,2.596328,4.0
1122,training,0.0,residue-ppi-BA_55233.BL00040001:M-P,"[4.371857755813835e-07, 0.9999995231628418]",1.0,2.596328,4.0
1071,training,0.0,residue-ppi-BA_55309.BL00160001:M-P,"[0.48398077487945557, 0.5160191655158997]",0.0,2.596328,2.0
1020,training,0.0,residue-ppi-BA_55382.BL00010001:M-P,"[1.0296871266746166e-07, 0.9999998807907104]",0.0,2.596328,4.0


In [80]:
fig = px.line(
    df[(df.phase =='training') | (df.phase =='validation')],
    x='epoch',
    y='loss',
    color='phase',
    markers=True)

fig.update_layout(
    xaxis_title='Epoch #',
    yaxis_title='Loss',
    width=800, height=500,
    title='Loss vs epochs' 
)

fig.show()
fig.write_html(os.path.join(exp_path, 'loss_epoch.html'))

In [95]:
# Are we interested in 0's or 1's predictions?
epoch = 50
fig = go.Figure()

for set in ['training', 'validation', 'testing']:
    if set == 'testing':
        df_plot = df[(df.phase == set)]
    else:
        df_plot = df[(df.epoch == epoch) & (df.phase == set)]
    y_true = df_plot.target
    y_score = np.array(df_plot.output.values.tolist())[:, 1]
    fpr, tpr, thr = roc_curve(y_true, y_score)
    name = f'{set}, AUC={auc(fpr, tpr):.4f}'

    fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='markers+lines'))

fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.update_layout(
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=800, height=500,
    showlegend=True,
    title='AUC curves' 
)

fig.show()
fig.write_html(os.path.join(exp_path, 'auc.html'))



No positive samples in y_true, true positive value should be meaningless


No positive samples in y_true, true positive value should be meaningless



In [None]:
# pr curve
# complete metrics