In [1]:
import pandas as pd
import numpy as np
import scipy.stats as sps
import seaborn as sns
import matplotlib.pyplot as plt
import os
import random
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import multiprocessing

In [2]:
def read_sota(path):
    casp13 = pd.DataFrame()
    casp12 = pd.DataFrame()
    for file in os.listdir(path):
        if not file.endswith('.csv'):
            continue
        target = file[:-4]
        df = pd.read_csv(os.path.join(path, file), index_col=0, sep='\t')
        df['model'] = df['model'].transform(lambda x: f'{target}_{x}')
        if file >= 'T0949.csv':
            casp13 = pd.concat([casp13, df])
        else:
            casp12 = pd.concat([casp12, df])
    return (casp12.set_index('model'), casp13.set_index('model'))

In [4]:
sota12, sota13 = read_sota('../sota_from_casp')
sota13

Unnamed: 0_level_0,sbrod,sbrod_server,proq3,proq3d_lddt,voromqa_b
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
T0986s2_BAKER-ROSETTASERVER_TS4,0.737451,0.387212,0.5111,0.3954,0.438
T0986s2_Delta-Gelly-Server_TS1,0.727368,0.372066,0.6292,0.4802,0.473
T0986s2_RaptorX-DeepModeller_TS2,0.722686,0.365234,0.5878,0.5049,0.456
T0986s2_Zhang-Server_TS2,0.720555,0.362166,0.5748,0.5090,0.521
T0986s2_RaptorX-DeepModeller_TS1,0.714794,0.353996,0.5385,0.5037,0.450
...,...,...,...,...,...
T1009_HMSCasper-Refiner_TS5,,0.126108,0.1398,0.2324,0.197
T1009_HMSCasper-Refiner_TS3,,0.098133,0.1037,0.2814,0.154
T1009_HMSCasper-Refiner_TS4,,0.092679,0.0803,0.2697,0.157
T1009_HMSCasper-Refiner_TS2,,0.086762,0.0642,0.2694,0.159


In [5]:
sota_sota12, sota_sota13 = read_sota('../sota')
sota_sota12

Unnamed: 0_level_0,proq3,sbrod,voromqa
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
T0928_MULTICOM-CONSTRUCT_TS4,0.502581,1.625210,0.296682
T0928_GOAL_TS1,0.647628,1.684738,0.420234
T0928_MUfold1_TS5,0.318427,1.515931,0.227225
T0928_FALCON_TOPOX_TS5,0.571618,1.628121,0.328569
T0928_myprotein-me_TS4,0.450548,1.595801,0.280673
...,...,...,...
T0862_IntFOLD4_TS1,0.318637,1.464842,0.181786
T0862_Pcons-net_TS2,0.209801,1.463733,0.238456
T0862_QUARK_TS2,0.389019,1.595531,0.313818
T0862_HHGG_TS5,0.098488,1.411283,0.137083


In [6]:
proq3_targets13 = set(sota13['proq3'].dropna().index)
proq3_targets12 = set(sota12['proq3'].dropna().index)

In [7]:
np.sum(np.isnan(sota['proq3'].dropna().values))

NameError: name 'sota' is not defined

In [8]:
voromqa_b_targets12 = set(sota12['voromqa'].dropna().index)
voromqa_b_targets13 = set(sota13['voromqa_b'].dropna().index)

In [9]:
sbrod_targets13 = set(sota13['sbrod_server'].dropna().index)
sbrod_targets12 = set(sota_sota12['sbrod'].dropna().index)

In [10]:
targets_all12 = proq3_targets12 & voromqa_b_targets12 & sbrod_targets12
len(targets_all12)

5471

In [11]:
targets_all13 = proq3_targets13 & voromqa_b_targets13 & sbrod_targets13
len(targets_all13)

11798

In [12]:
def network_results(results, filter_targets=None):
    glob = pd.DataFrame()
    for file in os.listdir(results):
        target = file[:-4]
        df = pd.read_csv(os.path.join(results, file))
        df['model'] = df['model'].transform(lambda x: f'{target}_{x}')
        if filter_targets is not None:
            df = df[df['model'].isin(filter_targets)]
        glob = pd.concat([glob, df])
    return glob

In [13]:
def one_thread(args):
        pearsons = []
        spearmans = []
        mses = []
        r2s = []
        results = args[0]
        K = args[1]
        sz = args[2]
        gen = tqdm(range(K)) if args[3] else range(K)
        for i in gen:
            sample = np.array(random.choices(results, k=sz))
            pred = sample[:, 1]
            gt = sample[:, 2]
            pearsons.append(sps.pearsonr(gt, pred)[0])
            spearmans.append(sps.spearmanr(gt, pred)[0])
            mses.append(mean_squared_error(gt, pred))
            r2s.append(r2_score(gt, pred))
        return (np.array(pearsons), np.array(spearmans), np.array(mses), np.array(r2s))

def bootstrap(results, K, sz=None, n_jobs=10):   
    if sz is None:
        sz = len(results)
    
    pearsons = []
    spearmans = []
    mses = []
    r2s = []
    
    args = []
    for i in range(n_jobs):
        args.append([results, K // n_jobs, sz, i == 0])
    
    p = multiprocessing.Pool(n_jobs)
    result = p.map(one_thread, args)
    for res in result:
        pearsons.append(res[0])
        spearmans.append(res[1])
        mses.append(res[2])
        r2s.append(res[3])
    return (np.concatenate(pearsons), np.concatenate(spearmans), np.concatenate(mses), np.concatenate(r2s))

In [14]:
def make_intervals(pearsons, spearmans, mses, r2s):
    z = sps.norm.ppf(0.975)
    def interval(sample, round_to=2, to_z=None):
        if to_z is None:
            return f'{round(sample.mean(), round_to)} ± {round(z * sample.std(), round_to)}'
        else:
            return f'{round(sample.mean() * 100, round_to)} ± {round(z * sample.std() * 100, round_to)}'
    
    return pd.DataFrame({
        'pearson_glob_glob': [interval(pearsons, 3)],
        'spearman_glob_glob': [interval(spearmans, 3)],
        'mse_glob_glob': [interval(mses, 4)],
        'r2_glob_glob': [interval(r2s, 3)],
    })

In [56]:
def make_sota(sota, preds):
    sota_filtered = sota.reset_index()
    if 'proq3d_lddt' in sota_filtered.columns:
        sota_filtered = sota_filtered.drop(['proq3d_lddt'], axis=1)
    sota_filtered = sota_filtered.dropna()
    targets = set(preds.model)
    sota_filtered = sota_filtered[sota_filtered['model'].isin(set(preds.model))]

    #print(set(sota_filtered.model) == set(preds.model))
    #print(len(set(sota_filtered.model)))
    
    sota_filtered = sota_filtered.set_index('model')
    sota_filtered = sota_filtered.join(preds.set_index('model'))
    proq3 = sota_filtered[['proq3', 'true']]
    if 'voromqa_b' in sota.columns:   
        voromqa = sota_filtered[['voromqa_b', 'true']]
    else:
        voromqa = sota_filtered[['voromqa', 'true']]
    if 'sbrod_server' in sota.columns:
        sbrod = sota_filtered[['sbrod_server', 'true']]
    elif 'sbrod' in sota.columns:
        sbrod = sota_filtered[['sbrod', 'true']]
    else:
        sbrod = pd.DataFrame()
    proq3 = proq3.reset_index()
    voromqa = voromqa.reset_index()
    sbrod = sbrod.reset_index()
    return (proq3.values, voromqa.values, sbrod.values)

# Global metrics

## CASP13

In [16]:
s5_or5 = network_results('globs/nikita_best/CASP13_test/checkpoint_epoch_39',
                         targets_all13).values

In [17]:
len(s5_or5)

10882

In [18]:
pearsons, spearmans, mses, r2s = bootstrap(s5_or5, 1000000, 6000, 20)

100%|██████████| 50000/50000 [16:22<00:00, 50.89it/s]


In [19]:
s5_or5_global = make_intervals(pearsons, spearmans, mses, r2s)

In [20]:
s5_or5_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.806 ± 0.009,0.808 ± 0.01,0.0131 ± 0.0004,-0.118 ± 0.033


In [21]:
base = network_results('globs/nikita_base/CASP13_test/checkpoint_epoch_15',
                      targets_all13).values

In [22]:
pearsons_b, spearmans_b, mses_b, r2s_b = bootstrap(base, 1000000, 6000)

100%|██████████| 100000/100000 [26:20<00:00, 63.26it/s]


In [23]:
base_global = make_intervals(pearsons_b, spearmans_b, mses_b, r2s_b)

In [24]:
base_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.487 ± 0.022,0.495 ± 0.021,0.0253 ± 0.0008,-1.158 ± 0.064


In [25]:
s5_or10 = network_results('globs/s5_d0.1_reg0.001_o100_v1/CASP13_test/checkpoint_epoch_52',
                          targets_all13).values

In [26]:
pearsons_10, spearmans_10, mses_10, r2s_10 = bootstrap(s5_or10, 1000000, 6000)

100%|██████████| 100000/100000 [27:03<00:00, 61.58it/s]


In [27]:
s5_or10_global = make_intervals(pearsons_10, spearmans_10, mses_10, r2s_10)

In [28]:
s5_or10_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.774 ± 0.01,0.783 ± 0.011,0.0068 ± 0.0002,0.422 ± 0.017


In [40]:
res = network_results('globs/nikita_best/CASP13_test/checkpoint_epoch_39', targets_all13)
proq3, voromqa_b, sbrod = make_sota(sota13, res)

In [32]:
pearsons_p, spearmans_p, mses_p, r2s_p = bootstrap(proq3, 1000000, 6000, 20)

100%|██████████| 50000/50000 [15:38<00:00, 53.28it/s]


In [33]:
proq3_global = make_intervals(pearsons_p, spearmans_p, mses_p, r2s_p)

In [34]:
proq3_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.726 ± 0.013,0.728 ± 0.014,0.0353 ± 0.0012,-1.97 ± 0.132


In [35]:
pearsons_v, spearmans_v, mses_v, r2s_v = bootstrap(voromqa_b, 1000000, 6000, 20)

100%|██████████| 50000/50000 [15:33<00:00, 53.57it/s]


In [36]:
voro_global = make_intervals(pearsons_v, spearmans_v, mses_v, r2s_v)

In [37]:
voro_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.659 ± 0.019,0.688 ± 0.017,0.038 ± 0.001,-2.198 ± 0.104


In [41]:
pearsons_s, spearmans_s, mses_s, r2s_s = bootstrap(sbrod, 1000000, 6000, 20)

100%|██████████| 50000/50000 [17:11<00:00, 48.48it/s]


In [42]:
sbrod_global = make_intervals(pearsons_s, spearmans_s, mses_s, r2s_s)

In [43]:
sbrod_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.417 ± 0.021,0.433 ± 0.021,0.0503 ± 0.0015,-3.235 ± 0.149


In [44]:
res_global = pd.concat((s5_or5_global, s5_or10_global,
                        base_global, proq3_global, voro_global, sbrod_global))

In [45]:
res_global.index = ['s5_o5', 's5_o10', 'base', 'proq3', 'voromqa', 'sbrod']

In [46]:
res_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
s5_o5,0.806 ± 0.009,0.808 ± 0.01,0.0131 ± 0.0004,-0.118 ± 0.033
s5_o10,0.774 ± 0.01,0.783 ± 0.011,0.0068 ± 0.0002,0.422 ± 0.017
base,0.487 ± 0.022,0.495 ± 0.021,0.0253 ± 0.0008,-1.158 ± 0.064
proq3,0.726 ± 0.013,0.728 ± 0.014,0.0353 ± 0.0012,-1.97 ± 0.132
voromqa,0.659 ± 0.019,0.688 ± 0.017,0.038 ± 0.001,-2.198 ± 0.104
sbrod,0.417 ± 0.021,0.433 ± 0.021,0.0503 ± 0.0015,-3.235 ± 0.149


## CASP12

In [47]:
s5_or5_12 = network_results('globs/nikita_best/CASP12_test/checkpoint_epoch_39',
                            targets_all12).values

In [48]:
pearsons12, spearmans12, mses12, r2s12 = bootstrap(s5_or5_12, 1000000, 6000, 20)

100%|██████████| 50000/50000 [15:20<00:00, 54.31it/s]


In [49]:
s5_or5_12_global = make_intervals(pearsons12, spearmans12, mses12, r2s12)
s5_or5_12_global

Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.854 ± 0.007,0.831 ± 0.009,0.0098 ± 0.0003,0.157 ± 0.025


In [50]:
base_12 = network_results('globs/nikita_base/CASP12_test/checkpoint_epoch_15',
                          targets_all12).values
pearsons_b12, spearmans_b12, mses_b12, r2s_b12 = bootstrap(base_12, 1000000, 6000, 20)
base12_global = make_intervals(pearsons_b12, spearmans_b12, mses_b12, r2s_b12)
base12_global

100%|██████████| 50000/50000 [14:48<00:00, 56.24it/s]  


Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.6 ± 0.017,0.587 ± 0.018,0.0188 ± 0.0006,-0.62 ± 0.047


In [51]:
s5_or10_12 = network_results('globs/s5_d0.1_reg0.001_o100_v1/CASP12_test/checkpoint_epoch_52',
                          targets_all12).values
pearsons_10_12, spearmans_10_12, mses_10_12, r2s_10_12 = bootstrap(s5_or10_12, 1000000, 6000, 20)
s5_or10_12_global = make_intervals(pearsons_10_12, spearmans_10_12, mses_10_12, r2s_10_12)
s5_or10_12_global

100%|██████████| 50000/50000 [15:30<00:00, 53.73it/s]  


Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.812 ± 0.009,0.789 ± 0.011,0.0049 ± 0.0001,0.573 ± 0.013


In [57]:
res = network_results('globs/nikita_best/CASP12_test/checkpoint_epoch_39', targets_all12)
proq3_12, voromqa_b_12, sbrod_ = make_sota(sota12, res)
_1, _2, sbrod12 = make_sota(sota_sota12, res)

In [59]:
pearsons_p12, spearmans_p12, mses_p12, r2s_p12 = bootstrap(proq3_12, 1000000, 6000, 20)
proq3_12_global = make_intervals(pearsons_p12, spearmans_p12, mses_p12, r2s_p12)
proq3_12_global

100%|██████████| 50000/50000 [16:05<00:00, 51.78it/s]  


Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.795 ± 0.008,0.806 ± 0.009,0.0352 ± 0.001,-2.038 ± 0.133


In [60]:
pearsons_12_v, spearmans_12_v, mses_12_v, r2s_12_v = bootstrap(voromqa_b_12, 1000000, 6000, 20)
voro_12_global = make_intervals(pearsons_12_v, spearmans_12_v, mses_12_v, r2s_12_v)
voro_12_global

100%|██████████| 50000/50000 [14:27<00:00, 57.66it/s]


Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.675 ± 0.016,0.7 ± 0.016,0.0513 ± 0.0011,-3.428 ± 0.138


In [61]:
pearsons_s12, spearmans_s12, mses_s12, r2s_s12 = bootstrap(sbrod12, 1000000, 6000, 20)
sbrod12_global = make_intervals(pearsons_s12, spearmans_s12, mses_s12, r2s_s12)
sbrod12_global

100%|██████████| 50000/50000 [15:45<00:00, 52.87it/s]  


Unnamed: 0,pearson_glob_glob,spearman_glob_glob,mse_glob_glob,r2_glob_glob
0,0.552 ± 0.017,0.531 ± 0.019,0.9611 ± 0.0064,-81.935 ± 2.78


# Decoy

In [157]:
def calculate_z_scores(vector):
    threshold = -2
    raw_z_scores = (vector - np.mean(vector)) / np.std(vector)
    filtered_vector = [vector[i] for i, z in enumerate(raw_z_scores) if z > threshold]
    z_scores = (vector - np.mean(filtered_vector)) / np.std(filtered_vector)
    z_scores = np.clip(z_scores, -2, None)
    return z_scores


def fisher_mean(correlations):
    return np.tanh(np.arctanh(correlations).mean())


def calc_target(preds, true):
    choice = int(np.argmax(preds))
    return {
        'pearson': sps.pearsonr(preds, true)[0],
        'spearman': sps.spearmanr(preds, true)[0],
        'mse': mean_squared_error(true, preds),
        'r2': r2_score(true, preds),
        'z_score': calculate_z_scores(true)[choice],
        'rank': sps.rankdata(-np.array(true))[choice]
    }

def network_global_results(results, filter_targets=None):
    glob = []
    for file in os.listdir(results):
        target = file[:-4]
        df = pd.read_csv(os.path.join(results, file))
        df['model'] = df['model'].transform(lambda x: f'{target}_{x}')
        if filter_targets is not None:
            df = df[df['model'].isin(filter_targets)]
        if len(df.pred.values) != 0:
            glob.append(calc_target(df.pred.values, df.true.values))
    return pd.DataFrame(glob)

def sota_result(sota_path, results_path, models):
    glob_proq3 = []
    glob_voromqa = []
    glob_sbrod = []
    for file in os.listdir(sota_path):
        target = file[:-4]
        if not file.endswith('.csv'):
            continue
        if not os.path.exists(os.path.join(results_path, file)):
            continue
        df = pd.read_csv(os.path.join(sota_path, file), sep='\t', index_col=0)
        #print(df)
        df['model'] = df['model'].transform(lambda x: f'{target}_{x}')
        df = df[df['model'].isin(models)]
        
        res = pd.read_csv(os.path.join(results_path, file))
        #print(res)
        res['model'] = res['model'].transform(lambda x: f'{target}_{x}')
        res = res[res['model'].isin(models)]
        
        df = df.set_index('model').join(res.set_index('model')).reset_index()
        
        if len(df.true.values) == 0:
            continue
        
        glob_proq3.append(calc_target(df.proq3, df.true))
        if 'voromqa_b' in df.columns:
            glob_voromqa.append(calc_target(df.voromqa_b, df.true))
        else:
            glob_voromqa.append(calc_target(df.voromqa, df.true))
        if 'sbrod_server' in df.columns:
            glob_sbrod.append(calc_target(df.sbrod_server, df.true))
        elif 'sbrod' in df.columns:
             glob_sbrod.append(calc_target(df.sbrod, df.true))
    return (pd.DataFrame(glob_proq3), pd.DataFrame(glob_voromqa), pd.DataFrame(glob_sbrod))


def one_thread(args):
        pearsons = []
        spearmans = []
        mses = []
        r2s = []
        z_scores = []
        ranks = []
        results = args[0]
        K = args[1]
        sz = args[2]
        gen = tqdm(range(K)) if args[3] else range(K)
        for i in gen:
            sample = np.array(random.choices(results, k=sz))
            z_scores.append(np.mean(sample[:, 4]))
            ranks.append(np.mean(sample[:, 5]))
            pearsons.append(fisher_mean(sample[:, 0]))
            spearmans.append(fisher_mean(sample[:, 1]))
            mses.append(np.mean(sample[:, 2]))
            r2s.append(np.mean(sample[:, 3]))
        return (
            np.array(z_scores), np.array(ranks),
            np.array(mses), np.array(r2s),
            np.array(pearsons), np.array(spearmans))


def bootstrap_decoy(results, K, sz=None, n_jobs=10):   
    if sz is None:
        sz = len(results)
    
    z_scores = []
    ranks = []
    mses = []
    r2s = []
    pearsons = []
    spearmans = []
    
    args = []
    for i in range(n_jobs):
        args.append([results, K // n_jobs, sz, i == 0])
    
    p = multiprocessing.Pool(n_jobs)
    result = p.map(one_thread, args)
    for res in result:
        z_scores.append(res[0])
        ranks.append(res[1])
        mses.append(res[2])
        r2s.append(res[3])
        pearsons.append(res[4])
        spearmans.append(res[5])
    return (
        np.concatenate(z_scores), np.concatenate(ranks),
        np.concatenate(mses), np.concatenate(r2s),
        np.concatenate(pearsons), np.concatenate(spearmans),
    )

def make_intervals_decoy(metrics):
    z = sps.norm.ppf(0.975)
    def interval(sample, round_to=2, to_z=None):
        if to_z is None:
            return f'{round(sample.mean(), round_to)} ± {round(z * sample.std(), round_to)}'
        else:
            return f'{round(sample.mean() * 100, round_to)} ± {round(z * sample.std() * 100, round_to)}'
    
    return pd.DataFrame({
        'z_score': [interval(metrics[0], 3)],
        'rank': [interval(metrics[1], 3)],
        'mse_glob_decoy': [interval(metrics[2], 4)],
        'r2_glob_decoy': [interval(metrics[3], 3)],
        'pearson_glob_decoy': [interval(metrics[4], 3)],
        'spearman_glob_decoy': [interval(metrics[5], 3)]
    })

In [86]:
targets_all13 = proq3_targets13 & voromqa_b_targets13 & sbrod_targets13 & set(s5_or10[:, 0])
targets_all12 = proq3_targets12 & voromqa_b_targets12 & sbrod_targets12 & set(s5_or10_12[:, 0])

## CASP13

In [158]:
s5_o5_t = network_global_results('globs/nikita_best/CASP13_test/checkpoint_epoch_39',
                      targets_all13)
make_intervals_decoy(bootstrap_decoy(s5_o5_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:10<00:00, 4714.95it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.362 ± 0.202,20.543 ± 6.546,0.0131 ± 0.0028,-3.458 ± 2.11,0.789 ± 0.032,0.743 ± 0.032


In [159]:
s5_o10_t = network_global_results('globs/s5_d0.1_reg0.001_o100_v1/CASP13_test/checkpoint_epoch_52',
                      targets_all13)
make_intervals_decoy(bootstrap_decoy(s5_o10_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:08<00:00, 6183.95it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.247 ± 0.196,22.762 ± 6.605,0.0068 ± 0.0015,-1.348 ± 1.028,0.721 ± 0.038,0.694 ± 0.038


In [160]:
base_t = network_global_results('globs/nikita_base/CASP13_test/checkpoint_epoch_15',
                      targets_all13)
make_intervals_decoy(bootstrap_decoy(base_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:11<00:00, 4215.86it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,0.838 ± 0.179,37.949 ± 6.71,0.0253 ± 0.0059,-8.889 ± 5.497,0.622 ± 0.047,0.599 ± 0.047


In [161]:
proq3_t, voromqa_t, sbrod_t = sota_result('../sota_from_casp',
                                         'globs/nikita_best/CASP13_test/checkpoint_epoch_39',
                                         targets_all13)

In [162]:
make_intervals_decoy(bootstrap_decoy(proq3_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:07<00:00, 6588.44it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.459 ± 0.201,18.994 ± 6.157,0.0348 ± 0.0076,-17.519 ± 8.838,0.775 ± 0.037,0.737 ± 0.035


In [163]:
make_intervals_decoy(bootstrap_decoy(voromqa_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:07<00:00, 6772.94it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.369 ± 0.222,21.242 ± 6.985,0.0378 ± 0.0076,-15.931 ± 8.854,0.803 ± 0.034,0.767 ± 0.033


In [164]:
make_intervals_decoy(bootstrap_decoy(sbrod_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:08<00:00, 6067.60it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.453 ± 0.181,18.245 ± 4.996,0.0515 ± 0.0121,-22.454 ± 10.917,0.804 ± 0.032,0.761 ± 0.033


# CASP12

In [165]:
s5_o5_t = network_global_results('globs/nikita_best/CASP12_test/checkpoint_epoch_39',
                      targets_all12)
make_intervals_decoy(bootstrap_decoy(s5_o5_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:07<00:00, 6256.17it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.704 ± 0.203,13.947 ± 5.392,0.0097 ± 0.0023,-1.89 ± 0.621,0.796 ± 0.03,0.738 ± 0.033


In [166]:
s5_o10_t = network_global_results('globs/s5_d0.1_reg0.001_o100_v1/CASP12_test/checkpoint_epoch_52',
                      targets_all12)
make_intervals_decoy(bootstrap_decoy(s5_o10_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:07<00:00, 6378.54it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.665 ± 0.158,12.632 ± 3.853,0.0049 ± 0.0008,-0.831 ± 0.322,0.71 ± 0.035,0.68 ± 0.036


In [167]:
base_t = network_global_results('globs/nikita_base/CASP12_test/checkpoint_epoch_15',
                      targets_all12)
make_intervals_decoy(bootstrap_decoy(base_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:07<00:00, 6589.83it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,0.818 ± 0.229,38.436 ± 9.136,0.0187 ± 0.0046,-4.719 ± 1.272,0.617 ± 0.035,0.557 ± 0.031


In [168]:
proq3_t, voromqa_t, _ = sota_result('../sota_from_casp',
                                         'globs/nikita_best/CASP12_test/checkpoint_epoch_39',
                                         targets_all12)
_, _, sbrod_t = sota_result('../sota',
                                         'globs/nikita_best/CASP12_test/checkpoint_epoch_39',
                                         targets_all12)

In [169]:
make_intervals_decoy(bootstrap_decoy(proq3_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:13<00:00, 3579.26it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.67 ± 0.168,11.96 ± 3.165,0.0354 ± 0.0052,-16.575 ± 4.272,0.801 ± 0.034,0.75 ± 0.035


In [170]:
make_intervals_decoy(bootstrap_decoy(voromqa_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:12<00:00, 4039.99it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.41 ± 0.18,17.173 ± 5.095,0.0512 ± 0.0088,-19.764 ± 4.021,0.803 ± 0.031,0.766 ± 0.032


In [171]:
make_intervals_decoy(bootstrap_decoy(sbrod_t.values, 1000000, 73, 20))

100%|██████████| 50000/50000 [00:11<00:00, 4448.30it/s]


Unnamed: 0,z_score,rank,mse_glob_decoy,r2_glob_decoy,pearson_glob_decoy,spearman_glob_decoy
0,1.282 ± 0.214,23.577 ± 6.763,0.9611 ± 0.0499,-427.839 ± 71.492,0.761 ± 0.043,0.684 ± 0.042
