In [28]:
import pandas as pd
import os
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs
from sklearn.metrics import mean_squared_error, r2_score, balanced_accuracy_score, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import random
from collections import defaultdict
import pickle
import seaborn as sns
from scipy.stats import rankdata

In [25]:
with open('perfect_regr_models.pickle', 'rb') as f:
    perfect_model_regr = pickle.load(f)
with open('perfect_class_models.pickle', 'rb') as f:
    perfect_model_class = pickle.load(f)    
 

In [29]:
def search_of_max_pki_in_dataset_class(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_train_ex_regr, y_train_ex, y_test_ex = data[0], data[1], data[2], data[4], data[5]
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    max_pki_index = y_train_ex_regr[['pKi']].idxmax()[0]
    max_pki = y_train_ex_regr[['pKi']].max()[0]
    best_points = perfect_model[dataset][1][:]
    if max_pki_index not in best_points:
        iteration_of_max_pki = 'max pki is not in selected points'
    else:
        if max_pki_index in y_train_in.index:
            random_point = random.choice(list(x_test_in.index))
            x_test_in, y_test_in = x_test_in.append(x_train_in.loc[max_pki_index]), y_test_in.append(y_train_in.loc[max_pki_index])
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[random_point]), y_train_in.append(y_test_in.loc[random_point])              
            x_train_in, y_train_in = x_train_in.drop(max_pki_index), y_train_in.drop(max_pki_index)
            x_test_in, y_test_in = x_test_in.drop(random_point), y_test_in.drop(random_point)        

        iteration = 0
        while True:

            adding_points = best_points[:5]
            del best_points[:5]
            print(adding_points)       
            for point in adding_points:
                x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
                x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)
            iteration += 1
            if max_pki_index in adding_points:
                iteration_of_max_pki = iteration
                break

    return iteration_of_max_pki, max_pki

In [30]:
class_search = {}
for dataset in tqdm(perfect_model_class.keys()):
    class_search[dataset] = search_of_max_pki_in_dataset_class(dataset, perfect_model_class)

  6%|▌         | 1/17 [00:02<00:37,  2.36s/it]

[88, 350, 10, 301, 236]
[184, 128, 43, 204, 134]
[212, 324, 16, 19, 335]
[152, 187, 313, 209, 122]


 12%|█▏        | 2/17 [00:03<00:27,  1.85s/it]

[274, 134, 433, 43, 58]
[308, 212, 191, 34, 257]
[306, 316, 50, 379, 190]


 18%|█▊        | 3/17 [00:03<00:21,  1.52s/it]

[233, 1, 20, 194, 460]
[550, 387, 437, 251, 362]
[45, 94, 435, 237, 333]
[350, 160, 260, 36, 459]
[53, 196, 175, 331, 397]
[371, 366, 217, 303, 393]
[344, 188, 14, 85, 120]


 24%|██▎       | 4/17 [00:04<00:14,  1.15s/it]

[150, 44, 56, 46, 417]
[272, 371, 313, 34, 127]
[213, 215, 13, 328, 5]
[147, 468, 437, 393, 457]
[387, 233, 236, 113, 316]
[325, 462, 440, 443, 433]
[314, 35, 469, 299, 279]
[12, 435, 27, 274, 214]
[432, 442, 395, 347, 455]
[217, 447, 198, 327, 282]
[216, 434, 304, 391, 301]
[456, 275, 470, 439, 290]
[309, 438, 365, 458, 332]


 29%|██▉       | 5/17 [00:04<00:11,  1.02it/s]

[330, 446, 390, 302, 427]
[448, 205, 300, 388, 203]
[396, 424, 291, 207, 412]
[7, 4, 441, 194, 479]
[6, 403, 378, 426, 417]
[381, 115, 197, 237, 335]


 47%|████▋     | 8/17 [00:06<00:07,  1.25it/s]

[40, 137, 512, 125, 196]
[344, 85, 479, 122, 595]
[369, 18, 221, 44, 8]
[240, 80, 430, 11, 93]


 82%|████████▏ | 14/17 [00:21<00:13,  4.52s/it]

[130, 37, 210, 111, 202]
[150, 108, 229, 353, 199]
[373, 49, 115, 404, 74]
[103, 116, 166, 139, 178]
[194, 102, 354, 359, 109]
[107, 394, 403, 355, 184]
[409, 253, 259, 158, 254]
[168, 248, 252, 347, 182]
[217, 385, 387, 344, 122]
[237, 106, 228, 245, 240]
[110, 257, 414, 402, 136]
[258, 351, 223, 249, 400]
[213, 241, 239, 212, 197]
[462, 381, 481, 196, 363]
[391, 378, 40, 189, 14]
[364, 13, 216, 421, 160]
[348, 5, 67, 235, 274]
[201, 151, 386, 205, 34]
[133, 246, 65, 399, 408]
[225, 345, 112, 236, 114]


 88%|████████▊ | 15/17 [00:22<00:06,  3.46s/it]

[388, 393, 177, 256, 187]
[119, 129, 38, 384, 147]
[138, 171, 361, 141, 154]


 94%|█████████▍| 16/17 [00:28<00:04,  4.35s/it]

[337, 185, 26, 307, 248]
[103, 232, 247, 324, 129]
[239, 65, 95, 56, 253]
[241, 206, 219, 204, 155]
[40, 308, 238, 169, 115]
[188, 176, 269, 201, 279]


100%|██████████| 17/17 [00:29<00:00,  1.71s/it]


In [46]:
def search_of_max_pki_in_dataset_regr(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_train_ex, y_test_ex = data[0], data[1], data[2], data[3]
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    max_pki_index = y_train_ex[['pKi']].idxmax()[0]
    max_pki = y_train_ex[['pKi']].max()[0]
    best_points = perfect_model[dataset][1][:]
    if max_pki_index not in best_points:
        iteration_of_max_pki = 'max pki is not in selected points'
    else:

        if max_pki_index in y_train_in.index:
            random_point = random.choice(list(x_test_in.index))
            x_test_in, y_test_in = x_test_in.append(x_train_in.loc[max_pki_index]), y_test_in.append(y_train_in.loc[max_pki_index])
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[random_point]), y_train_in.append(y_test_in.loc[random_point])              
            x_train_in, y_train_in = x_train_in.drop(max_pki_index), y_train_in.drop(max_pki_index)
            x_test_in, y_test_in = x_test_in.drop(random_point), y_test_in.drop(random_point)        

        iteration = 0
        while True:

            adding_points = best_points[:5]
            del best_points[:5]
            #print(adding_points)

            for point in adding_points:
                x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
                x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)
            iteration += 1
            if max_pki_index in adding_points:
                iteration_of_max_pki = iteration
                break

    return iteration_of_max_pki, max_pki

In [95]:
regr_search = {}
for dataset in perfect_model_regr.keys():
    if dataset not in regr_search.keys():
        regr_search[dataset] = search_of_max_pki_in_dataset_regr(dataset, perfect_model_regr)

In [112]:
with open ('new_results/perfect_regr_search_of_max_pki_in_int_test.pickle', 'wb') as f:
    pickle.dump(regr_search, f)

In [32]:
with open ('new_results/perfect_class_search_of_max_pki_in_int_test.pickle', 'wb') as f:
    pickle.dump(class_search, f)

In [5]:
def search_of_max_pki_rank_in_test_class(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_r_test_ex, y_train_ex, y_test_ex = data[0], data[1], data[3], data[4], data[5]   
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    max_pki_index_test = y_r_test_ex.idxmax()[0]
    max_pki_test = y_r_test_ex.max()[0]
    best_points = perfect_model[dataset][1][:]
    rank_of_max_pki = dict()
    iteration = 0
    iteration_of_max_pki = 'not found'
    while len(best_points) > 0:
        rfc = RandomForestClassifier(random_state=42, n_estimators=500, max_features='log2', n_jobs=20)
        rfc.fit(x_train_in.values, y_train_in['activity'].values)
        classes = set(y_train_in['activity'].values)

        adding_points = best_points[:5]
        del best_points[:5]
        for point in adding_points:
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
            x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)
        pred = rfc.predict(x_test_ex.values)
        proba = rfc.predict_proba(x_test_ex.values)
        if len(classes) == 1:
            if 1 in classes:
                proba = np.insert(proba, 0, 0, axis = 1)
            else:
                proba = np.insert(proba, 1, 0, axis = 1)

        prob_of_label_1_test = []
        for n, prob in enumerate(proba):
            prob_of_label_1_test.append([x_test_ex.index[n], prob[1]])
        
        
        ordered_probs = dict()
        ranked_probs = rankdata(prob_of_label_1_test, method='dense', axis=0)
        max_rank = np.amax(ranked_probs, axis = 0)[1]
        for i in ranked_probs:
            i[1] = max_rank - i[1]
        for n, el in enumerate(prob_of_label_1_test):
            ordered_probs[el[0]] = ranked_probs[n][1]
        rank_of_max_pki[iteration] = ordered_probs[max_pki_index_test]
        
        if iteration_of_max_pki == 'not found':
            number_of_top = 0
            top_indexes = []
            i = 0
            while number_of_top<5:
                number_of_top += list(ordered_probs.values()).count(i)
                for index, rank in ordered_probs.items():
                    if rank == i:
                        top_indexes.append(index)
                i+=1
            if max_pki_index_test in top_indexes:
                iteration_of_max_pki = iteration 
            else:
                pass
                
        iteration += 1
        
    return rank_of_max_pki, iteration_of_max_pki, max_pki_test

In [8]:
def search_of_max_pki_rank_in_test_regr(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_train_ex, y_test_ex = data[0], data[1], data[2], data[3]
    
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    max_pki_index_test = y_test_ex.idxmax()[0]
    max_pki_test = y_test_ex.max()[0]
    best_points = perfect_model[dataset][1][:]
    rank_of_max_pki = dict()
    iteration = 0
    iteration_of_max_pki = 'not found'
    while len(best_points) > 0:
        rfr = RandomForestRegressor(random_state=42, n_estimators=500, max_features='log2', n_jobs=20)
        rfr.fit(x_train_in.values, y_train_in['pKi'].values)

        adding_points = best_points[:5]
        del best_points[:5]
        for point in adding_points:
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
            x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)
       
        pred_test = rfr.predict(x_test_ex.values)
        pred_value_test = []
        for n, pred in enumerate(pred_test):
            pred_value_test.append([x_test_ex.index[n], pred])
              
        ordered_preds = dict()
        ranked_preds = rankdata(pred_value_test, method='dense', axis=0)
        max_rank = np.amax(ranked_preds, axis = 0)[1]
        for i in ranked_preds:
            i[1] = max_rank - i[1]
        for n, el in enumerate(pred_value_test):
            ordered_preds[el[0]] = ranked_preds[n][1]
        rank_of_max_pki[iteration] = ordered_preds[max_pki_index_test]
        
        if iteration_of_max_pki == 'not found':
            number_of_top = 0
            top_indexes = []
            i = 0
            while number_of_top<5:
                number_of_top += list(ordered_preds.values()).count(i)
                for index, rank in ordered_preds.items():
                    if rank == i:
                        top_indexes.append(index)
                i+=1
            if max_pki_index_test in top_indexes:
                iteration_of_max_pki = iteration 
            else:
                pass

        iteration += 1
        
    return rank_of_max_pki, iteration_of_max_pki, max_pki_test

In [13]:
rank_class = {}
for dataset in tqdm(perfect_model_class.keys()):
    if dataset not in rank_class:
        rank_class[dataset] = search_of_max_pki_rank_in_test_class(dataset, perfect_model_class)

  0%|          | 0/17 [00:00<?, ?it/s]

0 51
not found
1 33
not found
2 21
not found
3 32
not found
4 29
not found
5 30
not found
6 33
not found
7 33
not found
8 32
not found
9 36
not found
10 38
not found
11 37
not found
12 37
not found
13 36
not found
14 38
not found
15 42
not found
16 41
not found
17 40
not found
18 42
not found
19 38
not found
20 41
not found
21 39
not found
22 37
not found
23 40
not found
24 38
not found
25 37
not found
26 38
not found
27 42
not found
28 40
not found


 41%|████      | 7/17 [01:07<01:35,  9.58s/it]

29 40
not found
0 49
not found
1 53
not found
2 65
not found
3 47
not found
4 50
not found
5 44
not found
6 16
not found
7 27
not found
8 22
not found
9 8
not found
10 9
not found
11 12
not found
12 13
not found
13 17
not found
14 17
not found
15 18
not found
16 22
not found
17 18
not found
18 29
not found
19 21
not found
20 25
not found
21 23
not found
22 16
not found
23 16
not found
24 13
not found
25 13
not found
26 12
not found
27 15
not found
28 15
not found


 47%|████▋     | 8/17 [02:12<03:58, 26.48s/it]

29 12
not found
0 134
not found
1 50
not found
2 62
not found
3 99
not found
4 85
not found
5 88
not found
6 93
not found
7 93
not found
8 86
not found
9 100
not found
10 96
not found
11 127
not found
12 109
not found
13 120
not found
14 114
not found
15 114
not found
16 117
not found
17 103
not found
18 97
not found
19 96
not found
20 103
not found
21 104
not found
22 101
not found
23 102
not found
24 107
not found
25 102
not found
26 102
not found
27 104
not found
28 95
not found


 53%|█████▎    | 9/17 [03:25<05:23, 40.39s/it]

29 93
not found
0 24
not found
1 19
not found
2 11
not found
3 5
not found
4 4
4
5 5
4
6 0
4
7 0
4
8 1
4
9 0
4
10 0
4
11 2
4
12 0
4
13 2
4
14 1
4
15 0
4
16 3
4
17 2
4
18 2
4
19 0
4
20 3
4
21 4
4
22 2
4
23 4
4
24 4
4
25 6
4
26 4
4
27 5
4
28 5
4


 59%|█████▉    | 10/17 [04:32<05:38, 48.32s/it]

29 4
4
0 43
not found
1 32
not found
2 49
not found
3 44
not found
4 43
not found
5 38
not found
6 34
not found
7 36
not found
8 38
not found
9 45
not found
10 48
not found
11 52
not found
12 47
not found
13 48
not found
14 50
not found
15 50
not found
16 52
not found
17 45
not found
18 48
not found
19 44
not found
20 49
not found
21 49
not found
22 47
not found
23 39
not found
24 43
not found
25 44
not found
26 50
not found
27 50
not found
28 50
not found


 65%|██████▍   | 11/17 [05:38<05:22, 53.70s/it]

29 46
not found
0 65
not found
1 92
not found
2 58
not found
3 75
not found
4 83
not found
5 67
not found
6 69
not found
7 77
not found
8 51
not found
9 54
not found
10 46
not found
11 60
not found
12 69
not found
13 73
not found
14 71
not found
15 72
not found
16 63
not found
17 62
not found
18 59
not found
19 72
not found
20 87
not found
21 100
not found
22 87
not found
23 75
not found
24 61
not found
25 68
not found
26 75
not found
27 71
not found
28 57
not found


 71%|███████   | 12/17 [06:51<04:57, 59.41s/it]

29 66
not found
0 14
not found
1 8
not found
2 7
not found
3 4
not found
4 4
4
5 1
4
6 1
4
7 3
4
8 0
4
9 4
4
10 2
4
11 1
4
12 1
4
13 2
4
14 3
4
15 3
4
16 3
4
17 0
4
18 0
4
19 3
4
20 1
4
21 0
4
22 0
4
23 1
4
24 1
4
25 2
4
26 3
4
27 3
4
28 1
4


 76%|███████▋  | 13/17 [07:57<04:06, 61.50s/it]

29 4
4
0 21
not found
1 10
not found
2 10
not found
3 11
not found
4 7
not found
5 9
not found
6 10
not found
7 11
not found
8 6
not found
9 3
9
10 11
9
11 6
9
12 6
9
13 7
9
14 7
9
15 7
9
16 9
9
17 8
9
18 7
9
19 8
9
20 9
9
21 10
9
22 8
9
23 7
9
24 6
9
25 6
9
26 6
9
27 5
9
28 7
9


 82%|████████▏ | 14/17 [09:11<03:15, 65.22s/it]

29 4
9
0 62
not found
1 12
not found
2 11
not found
3 8
not found
4 17
not found
5 24
not found
6 22
not found
7 18
not found
8 18
not found
9 23
not found
10 28
not found
11 29
not found
12 28
not found
13 34
not found
14 25
not found
15 24
not found
16 26
not found
17 18
not found
18 19
not found
19 15
not found
20 14
not found
21 13
not found
22 13
not found
23 10
not found
24 7
not found
25 6
not found
26 4
26
27 6
26
28 6
26


 88%|████████▊ | 15/17 [10:18<02:11, 65.65s/it]

29 3
26
0 0
0
1 0
0
2 0
0
3 1
0
4 1
0
5 0
0
6 0
0
7 0
0
8 0
0
9 0
0
10 0
0
11 0
0
12 0
0
13 0
0
14 0
0
15 0
0
16 0
0
17 0
0
18 0
0
19 0
0
20 0
0
21 0
0
22 0
0
23 1
0
24 0
0
25 0
0
26 0
0
27 0
0
28 0
0


 94%|█████████▍| 16/17 [11:29<01:07, 67.17s/it]

29 0
0
0 35
not found
1 38
not found
2 37
not found
3 40
not found
4 45
not found
5 42
not found
6 37
not found
7 36
not found
8 46
not found
9 39
not found
10 43
not found
11 45
not found
12 47
not found
13 51
not found
14 52
not found
15 52
not found
16 50
not found
17 43
not found
18 61
not found
19 63
not found
20 64
not found
21 63
not found
22 56
not found
23 56
not found
24 66
not found
25 64
not found
26 62
not found
27 74
not found
28 77
not found


100%|██████████| 17/17 [12:35<00:00, 44.45s/it]

29 62
not found





In [15]:
with open('new_results/perfect_rank_class', 'wb') as f:
    pickle.dump(rank_class, f)

In [103]:
rank_regr = {}
for dataset in tqdm(perfect_model_regr.keys()):
    if dataset not in rank_regr.keys():
        rank_regr[dataset] = search_of_max_pki_rank_in_test_regr(dataset, perfect_model_regr)

  0%|          | 0/20 [00:00<?, ?it/s]

0 92
1 86
2 128
3 56
4 42
5 42
6 42
7 42
8 42
9 41
10 38
11 37
12 35
13 40
14 40
15 40
16 41
17 40
18 38
19 38
20 41
21 38
22 40
23 41
24 40
25 38
26 41
27 36
28 39


  5%|▌         | 1/20 [00:53<16:48, 53.10s/it]

29 39
0 6
1 14
2 12
3 18
4 16
5 19
6 20
7 20
8 18
9 18
10 18
11 19
12 21
13 19
14 21
15 20
16 19
17 23
18 22
19 26
20 26
21 16
22 10
23 17
24 11
25 16
26 13
27 16
28 18


 10%|█         | 2/20 [01:46<15:55, 53.10s/it]

29 16
0 13
1 29
2 52
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 1
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0


 15%|█▌        | 3/20 [02:39<15:03, 53.12s/it]

29 0
0 20
1 17
2 26
3 23
4 6
5 8
6 7
7 11
8 6
9 12
10 10
11 6
12 10
13 8
14 4
15 8
16 13
17 13
18 13
19 13
20 12
21 14
22 6
23 12
24 12
25 12
26 10
27 10
28 10


 85%|████████▌ | 17/20 [03:35<01:55, 38.38s/it]

29 11
0 54
1 41
2 54
3 36
4 33
5 35
6 35
7 39
8 45
9 50
10 65
11 60
12 67
13 66
14 78
15 41
16 55
17 67
18 74
19 67
20 76
21 72
22 80
23 68
24 69
25 75
26 78
27 83
28 82


 90%|█████████ | 18/20 [04:27<01:25, 42.67s/it]

29 80
0 72
1 80
2 42
3 49
4 43
5 32
6 50
7 25
8 18
9 22
10 24
11 19
12 25
13 18
14 34
15 39
16 34
17 28
18 24
19 22
20 25
21 24
22 28
23 22
24 29
25 32
26 37
27 43
28 32


 95%|█████████▌| 19/20 [05:20<00:45, 45.80s/it]

29 33
0 36
1 55
2 53
3 56
4 58
5 61
6 61
7 60
8 50
9 42
10 41
11 31
12 39
13 35
14 40
15 30
16 38
17 36
18 39
19 44
20 43
21 42
22 48
23 42
24 39
25 50
26 45
27 43
28 40


100%|██████████| 20/20 [06:16<00:00, 18.82s/it]

29 44





In [110]:
with open('new_results/perfect_rank_regr', 'wb') as f:
    pickle.dump(rank_regr, f)

In [212]:
def EF_regression(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_train_ex, y_test_ex = data[0], data[1], data[2], data[3]
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
       
    results = []
    iteration = 0
    best_points = perfect_model[dataset][1][:]
    
    while len(best_points) > 0:
        
        rfr = RandomForestRegressor(random_state=42, n_estimators=500, max_features='log2', n_jobs=20)
        rfr.fit(x_train_in.values, y_train_in['pKi'].values)
 
         
        adding_points = best_points[:5]
        del best_points[:5]
        for point in adding_points:
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
            x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)

        pred_test = rfr.predict(x_test_ex.values)
        pred_value_test = []
        for n, pred in enumerate(pred_test):
            pred_value_test.append([x_test_ex.index[n], pred])
       
        ordered_preds = dict()
        ranked_preds = rankdata(pred_value_test, method='dense', axis=0)
        max_rank = np.amax(ranked_preds, axis = 0)[1]
        for i in ranked_preds:
            i[1] = max_rank - i[1]
        for n, el in enumerate(pred_value_test):
            ordered_preds[el[0]] = ranked_preds[n][1]
        
     
        number_of_points = 0
        for rank in range (max(list(ordered_preds.values()))):
            while number_of_points < round(y_test_ex.shape[0] * 0.1):
                number_of_points += list(ordered_preds.values()).count(rank)
                max_rank = rank 
                break
      
        predicted_max_pki = set()
        for key, item in ordered_preds.items():
            if item <= max_rank:
                predicted_max_pki.add(key)
       
        true_max_pki = set(y_test_ex.sort_values(by = 'pKi', ascending=False).index[:number_of_points])
       
        try:
            results.append([iteration, len(true_max_pki.intersection(predicted_max_pki))/number_of_points, number_of_points/y_test_ex.shape[0]])
        except ZeroDivisionError:
            results.append([iteration, 0, number_of_points/y_test_ex.shape[0]])

        iteration += 1 

    return results

In [64]:
def EF_classification(dataset, perfect_model):
    data = []  
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))

    x_train_ex, x_test_ex, y_test_ex_regr, y_train_ex, y_test_ex = data[0], data[1], data[3], data[4], data[5]
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
       
    results = []
    iteration = 0
    best_points = perfect_model[dataset][1][:]
    while len(best_points) > 0:
        
        rfc = RandomForestClassifier(random_state=42, n_estimators=500, max_features='log2', n_jobs=20)
        rfc.fit(x_train_in.values, y_train_in['activity'].values)
        classes = set(y_train_in['activity'].values)
 
        adding_points = best_points[:5]
        del best_points[:5]
        
        for point in adding_points:
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
            x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)

        pred_ex = rfc.predict(x_test_ex.values)
        proba_ex = rfc.predict_proba(x_test_ex.values)
        if len(classes) == 1:
            if 1 in classes:
                proba_ex = np.insert(proba_ex, 0, 0, axis = 1)
            else:
                proba_ex = np.insert(proba_ex, 1, 0, axis = 1)

        prob_of_label_1_test = []
        for n, prob in enumerate(proba_ex):
            prob_of_label_1_test.append([x_test_ex.index[n], prob[1]])

       
        ordered_probs = dict()
        ranked_probs = rankdata(prob_of_label_1_test, method='dense', axis=0)
        max_rank = np.amax(ranked_probs, axis = 0)[1]
        for i in ranked_probs:
            i[1] = max_rank - i[1]
       
        for n, el in enumerate(prob_of_label_1_test):
            ordered_probs[el[0]] = ranked_probs[n][1]
     
        number_of_points = 0
        for rank in range (max(list(ordered_probs.values()))):
            while number_of_points < round(y_test_ex.shape[0] * 0.1):
                number_of_points += list(ordered_probs.values()).count(rank)
                max_rank = rank 
                break
      
        predicted_max_pki = set()
        for key, item in ordered_probs.items():
            if item <= max_rank:
                predicted_max_pki.add(key)
       
        true_max_pki = set(y_test_ex_regr.sort_values(by = 'pKi', ascending=False).index[:number_of_points])
       
        try:
            results.append([iteration, len(true_max_pki.intersection(predicted_max_pki))/number_of_points, number_of_points/y_test_ex.shape[0]])
        except ZeroDivisionError:
            results.append([iteration, 0, number_of_points/y_test_ex.shape[0]])
        
        iteration += 1 
    return results

In [None]:
EF_regr = {}
for dataset in tqdm(perfect_model_regr.keys()):    
    EF_regr[dataset] = EF_regression(dataset, perfect_model_regr)

In [65]:
EF_class = {}
for dataset in tqdm(perfect_model_class.keys()):    
    EF_class[dataset] = EF_classification(dataset, perfect_model_class)

100%|██████████| 17/17 [18:11<00:00, 64.20s/it]


In [214]:
with open('new_results/perfect_EF_regr', 'wb') as f:
    pickle.dump(EF_regr, f)

In [67]:
with open('new_results/perfect_EF_class', 'wb') as f:
    pickle.dump(EF_class, f)

In [21]:
def AUC_10(dataset, perfect_model):
    data = []
    with open(f'x_and_y/{dataset}.pickle', "rb") as f:
        for _ in range(pickle.load(f)):
            data.append(pickle.load(f))
    x_train_ex, x_test_ex, y_test_ex_regr, y_train_ex, y_test_ex = data[0], data[1], data[3], data[4], data[5]
    x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    while all(x in x_test_in.index for x in perfect_model[dataset][1]) != True:
        x_train_in, x_test_in, y_train_in, y_test_in = train_test_split(x_train_ex, y_train_ex, test_size=len(y_train_ex)-10)
    number_of_points = round(y_test_ex.shape[0] * 0.1)
    results = []
    iteration = 0
    best_points = perfect_model[dataset][1][:]
    while len(best_points) > 0:
    
        
        rfc = RandomForestClassifier(random_state=42, n_estimators=500, max_features='log2', n_jobs=20)
        rfc.fit(x_train_in.values, y_train_in['activity'].values)
        classes = set(y_train_in['activity'].values)
 
        adding_points = best_points[:5]
        del best_points[:5]
        
        for point in adding_points:
            x_train_in, y_train_in = x_train_in.append(x_test_in.loc[point]), y_train_in.append(y_test_in.loc[point])
            x_test_in, y_test_in = x_test_in.drop(point), y_test_in.drop(point)

        pred_ex = rfc.predict(x_test_ex.values)
        proba_ex = rfc.predict_proba(x_test_ex.values)
        if len(classes) == 1:
            if 1 in classes:
                proba_ex = np.insert(proba_ex, 0, 0, axis = 1)
            else:
                proba_ex = np.insert(proba_ex, 1, 0, axis = 1)
        max_pkis = set(y_test_ex_regr.sort_values(by = 'pKi', ascending=False).index[:number_of_points])
        for value in y_test_ex.index:
            if value in max_pkis:
                y_test_ex.loc[value]['activity'] = 1 
            else:
                y_test_ex.loc[value]['activity'] = 0
       
        fpr, tpr, _ = roc_curve(y_test_ex.values, [x[1] for x in proba_ex])
       
        AUC = auc(fpr, tpr)
        results.append([iteration, AUC])
        
        iteration += 1 

    return results

In [22]:
AUC10_class = {}
for dataset in tqdm(perfect_model_class.keys()):
    AUC10_class[dataset] = AUC_10(dataset, perfect_model_class)

  0%|          | 0/17 [00:00<?, ?it/s]

[0, 0.5625]
[1, 0.5606617647058824]
[2, 0.521139705882353]
[3, 0.8060661764705884]
[4, 0.8621323529411765]
[5, 0.8998161764705883]
[6, 0.9145220588235294]
[7, 0.8988970588235294]
[8, 0.8841911764705882]
[9, 0.8860294117647058]
[10, 0.8970588235294118]
[11, 0.9044117647058824]
[12, 0.8869485294117647]
[13, 0.8970588235294117]
[14, 0.9181985294117647]
[15, 0.9016544117647058]
[16, 0.8887867647058822]
[17, 0.8262867647058824]
[18, 0.8290441176470589]
[19, 0.84375]
[20, 0.8694852941176471]
[21, 0.8566176470588236]
[22, 0.8851102941176471]
[23, 0.9053308823529411]
[24, 0.9117647058823529]
[25, 0.9080882352941176]
[26, 0.8933823529411764]
[27, 0.8970588235294117]
[28, 0.9025735294117647]


  6%|▌         | 1/17 [01:06<17:47, 66.72s/it]

[29, 0.9172794117647058]
[0, 0.5]
[1, 0.5]
[2, 0.45228758169934635]
[3, 0.3928104575163398]
[4, 0.34901960784313724]
[5, 0.3627450980392156]
[6, 0.36732026143790847]
[7, 0.3699346405228758]
[8, 0.4176470588235294]
[9, 0.4405228758169935]
[10, 0.45424836601307195]
[11, 0.4673202614379085]
[12, 0.4941176470588235]
[13, 0.4797385620915033]
[14, 0.45686274509803926]
[15, 0.469281045751634]
[16, 0.5725490196078431]
[17, 0.5640522875816993]
[18, 0.5673202614379085]
[19, 0.5176470588235293]
[20, 0.5333333333333333]
[21, 0.515032679738562]
[22, 0.5183006535947712]
[23, 0.5392156862745098]
[24, 0.5405228758169934]
[25, 0.5444444444444444]
[26, 0.5483660130718954]
[27, 0.5424836601307189]
[28, 0.5274509803921569]


 12%|█▏        | 2/17 [02:12<16:36, 66.45s/it]

[29, 0.5326797385620915]
[0, 0.5]
[1, 0.716931216931217]
[2, 0.6263227513227514]
[3, 0.703042328042328]
[4, 0.6613756613756614]
[5, 0.6798941798941799]
[6, 0.7275132275132276]
[7, 0.6124338624338624]
[8, 0.656084656084656]
[9, 0.666005291005291]
[10, 0.6296296296296295]
[11, 0.6448412698412698]
[12, 0.6051587301587301]
[13, 0.5740740740740741]
[14, 0.5390211640211641]
[15, 0.5674603174603174]
[16, 0.5681216931216931]
[17, 0.5886243386243386]
[18, 0.5873015873015873]
[19, 0.613095238095238]
[20, 0.6111111111111112]
[21, 0.6342592592592593]
[22, 0.5985449735449736]
[23, 0.626984126984127]
[24, 0.5873015873015873]
[25, 0.5727513227513227]
[26, 0.591931216931217]
[27, 0.6164021164021164]
[28, 0.5945767195767196]


 18%|█▊        | 3/17 [03:19<15:34, 66.72s/it]

[29, 0.6283068783068783]
[0, 0.6318631863186319]
[1, 0.5940594059405941]
[2, 0.6188118811881188]
[3, 0.7448244824482448]
[4, 0.7470747074707471]
[5, 0.8060306030603059]
[6, 0.7889288928892889]
[7, 0.7785778577857787]
[8, 0.8064806480648065]
[9, 0.8055805580558055]
[10, 0.8114311431143114]
[11, 0.7947794779477948]
[12, 0.7925292529252925]
[13, 0.7902790279027904]
[14, 0.8082808280828083]
[15, 0.7925292529252925]
[16, 0.7709270927092711]
[17, 0.7925292529252925]
[18, 0.7637263726372637]
[19, 0.7812781278127814]
[20, 0.7857785778577859]
[21, 0.7857785778577858]
[22, 0.7673267326732673]
[23, 0.7821782178217821]
[24, 0.7848784878487848]
[25, 0.7799279927992799]
[26, 0.801980198019802]
[27, 0.8177317731773179]
[28, 0.8235823582358235]


 24%|██▎       | 4/17 [04:26<14:28, 66.82s/it]

[29, 0.8370837083708371]
[0, 0.5]
[1, 0.2718390804597701]
[2, 0.39482758620689656]
[3, 0.7637931034482758]
[4, 0.8114942528735632]
[5, 0.8264367816091954]
[6, 0.8402298850574712]
[7, 0.8459770114942529]
[8, 0.843103448275862]
[9, 0.8603448275862069]
[10, 0.8603448275862069]
[11, 0.8879310344827586]
[12, 0.8954022988505747]
[13, 0.8988505747126436]
[14, 0.8781609195402299]
[15, 0.9063218390804598]
[16, 0.9137931034482759]
[17, 0.9080459770114943]
[18, 0.903448275862069]
[19, 0.9063218390804598]
[20, 0.9103448275862069]
[21, 0.9120689655172414]
[22, 0.9011494252873563]
[23, 0.9080459770114941]
[24, 0.906896551724138]
[25, 0.9172413793103449]
[26, 0.9028735632183907]
[27, 0.9195402298850575]
[28, 0.924712643678161]


 29%|██▉       | 5/17 [05:34<13:23, 67.00s/it]

[29, 0.9270114942528735]
[0, 0.7843653250773993]
[1, 0.8156346749226006]
[2, 0.789938080495356]
[3, 0.828328173374613]
[4, 0.8362229102167182]
[5, 0.8439628482972136]
[6, 0.8472136222910217]
[7, 0.8404024767801858]
[8, 0.8321981424148606]
[9, 0.8270897832817337]
[10, 0.836687306501548]
[11, 0.8326625386996904]
[12, 0.8328173374613004]
[13, 0.8165634674922602]
[14, 0.8337461300309598]
[15, 0.8348297213622291]
[16, 0.8339009287925696]
[17, 0.8551083591331269]
[18, 0.8555727554179566]
[19, 0.8554179566563467]
[20, 0.8492260061919504]
[21, 0.8563467492260062]
[22, 0.8484520123839009]
[23, 0.8486068111455108]
[24, 0.8472136222910216]
[25, 0.8453560371517027]
[26, 0.8571207430340557]
[27, 0.8493808049535603]
[28, 0.8472136222910216]


 35%|███▌      | 6/17 [06:41<12:16, 66.92s/it]

[29, 0.8476780185758515]
[0, 0.6694796061884669]
[1, 0.6715893108298172]
[2, 0.7158931082981715]
[3, 0.7088607594936709]
[4, 0.6870604781997187]
[5, 0.650492264416315]
[6, 0.6385372714486638]
[7, 0.6589310829817159]
[8, 0.6385372714486639]
[9, 0.6610407876230661]
[10, 0.639943741209564]
[11, 0.6343178621659634]
[12, 0.6286919831223629]
[13, 0.6216596343178622]
[14, 0.6132208157524612]
[15, 0.6047819971870605]
[16, 0.6082981715893108]
[17, 0.6258790436005626]
[18, 0.6146272855133614]
[19, 0.6139240506329113]
[20, 0.6258790436005626]
[21, 0.6343178621659634]
[22, 0.6251758087201125]
[23, 0.6315049226441631]
[24, 0.6329113924050633]
[25, 0.6251758087201125]
[26, 0.6462728551336147]
[27, 0.6385372714486639]
[28, 0.650492264416315]


 41%|████      | 7/17 [07:48<11:09, 66.98s/it]

[29, 0.650492264416315]
[0, 0.5660029498525073]
[1, 0.633480825958702]
[2, 0.5623156342182891]
[3, 0.7688053097345133]
[4, 0.7551622418879056]
[5, 0.7769174041297935]
[6, 0.7809734513274337]
[7, 0.7496312684365782]
[8, 0.7647492625368731]
[9, 0.7809734513274337]
[10, 0.7732300884955752]
[11, 0.7632743362831859]
[12, 0.7525811209439528]
[13, 0.7636430678466077]
[14, 0.7470501474926254]
[15, 0.7592182890855458]
[16, 0.7654867256637168]
[17, 0.7669616519174042]
[18, 0.7632743362831859]
[19, 0.7794985250737463]
[20, 0.7658554572271387]
[21, 0.7721238938053098]
[22, 0.769542772861357]
[23, 0.7735988200589972]
[24, 0.7595870206489675]
[25, 0.7625368731563421]
[26, 0.7739675516224188]
[27, 0.7717551622418879]
[28, 0.7721238938053097]


 47%|████▋     | 8/17 [08:53<09:58, 66.48s/it]

[29, 0.7713864306784661]
[0, 0.7060483870967741]
[1, 0.5775345622119816]
[2, 0.6755184331797236]
[3, 0.7600230414746544]
[4, 0.7986751152073732]
[5, 0.7892281105990783]
[6, 0.8039746543778802]
[7, 0.777073732718894]
[8, 0.7846774193548387]
[9, 0.8207949308755761]
[10, 0.8092165898617512]
[11, 0.8210829493087558]
[12, 0.81463133640553]
[13, 0.8184331797235023]
[14, 0.815610599078341]
[15, 0.8282258064516129]
[16, 0.8378456221198156]
[17, 0.8402073732718894]
[18, 0.8455645161290323]
[19, 0.8488479262672811]
[20, 0.8466589861751151]
[21, 0.8436059907834101]
[22, 0.8453341013824885]
[23, 0.8437788018433179]
[24, 0.8469470046082949]
[25, 0.8501152073732718]
[26, 0.8531105990783411]
[27, 0.8525345622119815]
[28, 0.8523617511520737]


 53%|█████▎    | 9/17 [10:06<09:06, 68.36s/it]

[29, 0.8536290322580645]
[0, 0.5]
[1, 0.6941097724230254]
[2, 0.8018741633199464]
[3, 0.7637215528781793]
[4, 0.7791164658634538]
[5, 0.785809906291834]
[6, 0.7904953145917001]
[7, 0.7717536813922355]
[8, 0.7771084337349398]
[9, 0.7717536813922357]
[10, 0.7530120481927711]
[11, 0.7489959839357428]
[12, 0.7295850066934404]
[13, 0.7095046854082998]
[14, 0.6847389558232931]
[15, 0.6827309236947791]
[16, 0.6880856760374833]
[17, 0.6847389558232932]
[18, 0.6927710843373494]
[19, 0.714859437751004]
[20, 0.7034805890227577]
[21, 0.7041499330655958]
[22, 0.7095046854082998]
[23, 0.7095046854082998]
[24, 0.7121820615796519]
[25, 0.7081659973226239]
[26, 0.7228915662650601]
[27, 0.7121820615796519]
[28, 0.7008032128514056]


 59%|█████▉    | 10/17 [11:12<07:54, 67.85s/it]

[29, 0.7088353413654618]
[0, 0.6185567010309276]
[1, 0.6485473289597]
[2, 0.6508903467666355]
[3, 0.6644798500468604]
[4, 0.6312089971883786]
[5, 0.6218369259606373]
[6, 0.49765698219306465]
[7, 0.5740393626991565]
[8, 0.6091846298031866]
[9, 0.6424554826616683]
[10, 0.6148078725398313]
[11, 0.6733833177132147]
[12, 0.7005623242736645]
[13, 0.6771321462043112]
[14, 0.6733833177132147]
[15, 0.6841611996251171]
[16, 0.6654170571696344]
[17, 0.6696344892221181]
[18, 0.6771321462043112]
[19, 0.6790065604498594]
[20, 0.7057169634489222]
[21, 0.7029053420805998]
[22, 0.6935332708528585]
[23, 0.655576382380506]
[24, 0.6691658856607311]
[25, 0.6944704779756327]
[26, 0.7047797563261481]
[27, 0.6958762886597938]
[28, 0.7127460168697283]


 65%|██████▍   | 11/17 [12:19<06:45, 67.55s/it]

[29, 0.7141518275538895]
[0, 0.4709792050539616]
[1, 0.5710713345617268]
[2, 0.6868912871808371]
[3, 0.596933403527244]
[4, 0.6488549618320612]
[5, 0.6320742300605423]
[6, 0.5990392208475915]
[7, 0.6197025533035009]
[8, 0.6662279547249276]
[9, 0.6524743353514083]
[10, 0.6630034219531455]
[11, 0.666886022637536]
[12, 0.659055014477494]
[13, 0.6350355356672809]
[14, 0.6595156620163201]
[15, 0.6481310871281917]
[16, 0.6490523822058435]
[17, 0.6635298762832323]
[18, 0.6568175835746249]
[19, 0.647933666754409]
[20, 0.6470123716767571]
[21, 0.6416820215846274]
[22, 0.6245064490655435]
[23, 0.6473414056330613]
[24, 0.6360884443274546]
[25, 0.6379968412740195]
[26, 0.6499078704922346]
[27, 0.6777441431955777]
[28, 0.7036720189523559]


 71%|███████   | 12/17 [13:32<05:45, 69.19s/it]

[29, 0.706435904185312]
[0, 0.8365168539325842]
[1, 0.9292134831460673]
[2, 0.9056179775280899]
[3, 0.9078651685393259]
[4, 0.897191011235955]
[5, 0.8960674157303371]
[6, 0.8910112359550562]
[7, 0.8932584269662921]
[8, 0.8775280898876405]
[9, 0.8825842696629214]
[10, 0.8741573033707865]
[11, 0.8679775280898877]
[12, 0.8691011235955056]
[13, 0.8808988764044944]
[14, 0.8674157303370786]
[15, 0.8657303370786518]
[16, 0.8724719101123596]
[17, 0.8719101123595505]
[18, 0.8629213483146068]
[19, 0.8668539325842697]
[20, 0.8589887640449438]
[21, 0.8679775280898876]
[22, 0.8634831460674157]
[23, 0.8646067415730337]
[24, 0.8460674157303371]
[25, 0.853932584269663]
[26, 0.848314606741573]
[27, 0.848314606741573]
[28, 0.8567415730337079]


 76%|███████▋  | 13/17 [14:39<04:34, 68.57s/it]

[29, 0.8550561797752809]
[0, 0.8006912442396313]
[1, 0.8859447004608294]
[2, 0.847926267281106]
[3, 0.8790322580645161]
[4, 0.8963133640552995]
[5, 0.8870967741935484]
[6, 0.8928571428571429]
[7, 0.9055299539170506]
[8, 0.9101382488479263]
[9, 0.9089861751152074]
[10, 0.8744239631336406]
[11, 0.9147465437788018]
[12, 0.9101382488479263]
[13, 0.9147465437788018]
[14, 0.9285714285714285]
[15, 0.923963133640553]
[16, 0.9158986175115207]
[17, 0.9170506912442397]
[18, 0.9147465437788019]
[19, 0.923963133640553]
[20, 0.9182027649769585]
[21, 0.9170506912442397]
[22, 0.9170506912442397]
[23, 0.9297235023041475]
[24, 0.9239631336405529]
[25, 0.9239631336405529]
[26, 0.913594470046083]
[27, 0.9182027649769585]
[28, 0.9216589861751151]


 82%|████████▏ | 14/17 [15:49<03:26, 68.87s/it]

[29, 0.9182027649769585]
[0, 0.771264367816092]
[1, 0.7666666666666666]
[2, 0.7862068965517242]
[3, 0.7977011494252874]
[4, 0.7752873563218391]
[5, 0.8045977011494253]
[6, 0.7873563218390804]
[7, 0.7954022988505747]
[8, 0.7683908045977011]
[9, 0.792528735632184]
[10, 0.8080459770114943]
[11, 0.7672413793103449]
[12, 0.7781609195402299]
[13, 0.7609195402298851]
[14, 0.821264367816092]
[15, 0.8195402298850575]
[16, 0.8057471264367816]
[17, 0.8114942528735632]
[18, 0.7867816091954023]
[19, 0.7827586206896552]
[20, 0.7764367816091954]
[21, 0.7810344827586206]
[22, 0.7563218390804598]
[23, 0.7586206896551725]
[24, 0.732183908045977]
[25, 0.7586206896551725]
[26, 0.7770114942528735]
[27, 0.7810344827586206]
[28, 0.7787356321839081]


 88%|████████▊ | 15/17 [16:56<02:16, 68.42s/it]

[29, 0.7747126436781608]
[0, 0.5]
[1, 0.6993087557603687]
[2, 0.7085253456221198]
[3, 0.653225806451613]
[4, 0.7154377880184332]
[5, 0.6970046082949309]
[6, 0.6728110599078342]
[7, 0.6658986175115207]
[8, 0.6705069124423962]
[9, 0.6981566820276497]
[10, 0.7142857142857142]
[11, 0.7039170506912442]
[12, 0.6797235023041475]
[13, 0.7050691244239631]
[14, 0.6923963133640553]
[15, 0.7165898617511521]
[16, 0.7269585253456221]
[17, 0.6923963133640553]
[18, 0.7223502304147466]
[19, 0.7476958525345622]
[20, 0.7569124423963133]
[21, 0.7776497695852534]
[22, 0.7684331797235023]
[23, 0.7788018433179724]
[24, 0.7960829493087558]
[25, 0.8237327188940092]
[26, 0.8225806451612903]
[27, 0.8283410138248848]
[28, 0.8110599078341014]


 94%|█████████▍| 16/17 [18:04<01:08, 68.11s/it]

[29, 0.8133640552995391]
[0, 0.5850558659217877]
[1, 0.5465083798882682]
[2, 0.5484636871508379]
[3, 0.6191340782122905]
[4, 0.6537709497206704]
[5, 0.6236033519553073]
[6, 0.6304469273743016]
[7, 0.630586592178771]
[8, 0.6245810055865921]
[9, 0.6603351955307263]
[10, 0.6335195530726256]
[11, 0.6424581005586592]
[12, 0.604050279329609]
[13, 0.6016759776536312]
[14, 0.614804469273743]
[15, 0.6180167597765364]
[16, 0.6430167597765364]
[17, 0.6451117318435755]
[18, 0.6423184357541899]
[19, 0.6092178770949721]
[20, 0.6305865921787709]
[21, 0.6395251396648045]
[22, 0.6451117318435754]
[23, 0.6508379888268156]
[24, 0.6409217877094971]
[25, 0.6339385474860335]
[26, 0.6456703910614525]
[27, 0.654608938547486]
[28, 0.6713687150837988]


100%|██████████| 17/17 [19:12<00:00, 67.78s/it]

[29, 0.6861731843575418]





In [23]:
with open('new_results/perfect_AUC10_class', 'wb') as f:
    pickle.dump(AUC10_class, f)