In [10]:
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
from copy import deepcopy
from tqdm import tqdm

from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split

In [11]:
random.seed(42)

In [12]:
def calc_accuracy(y_true, y_pred, metric='mae'):
    if metric == 'mae':
        acc = mean_absolute_error(y_true, y_pred)
    elif metric == 'r2':
        acc = r2_score(y_true, y_pred)
    elif metric == 'rmse':
        acc = root_mean_squared_error(y_true, y_pred)

    return acc

## Consensus methods

In [13]:
from consensus import RandomSearchRegressor, SystematicSearchRegressor, GeneticSearchRegressor, HyperoptSearchRegressor

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Ridge

In [14]:
method_list = [(SystematicSearchRegressor(cons_size=10, metric='r2'), 'Best'),
               #(SystematicSearchRegressor(cons_size=10**3, metric='r2'), 'All'),
               (RandomSearchRegressor(cons_size=2, n_iter=5, metric='r2'), 'Random'),
               (SystematicSearchRegressor(cons_size=2, metric='r2'), 'Systematic'),
               #(GeneticSearchRegressor(cons_size=3, mut_prob=0.5, metric='r2'), 'Genetic'),
               #(HyperoptSearchRegressor(cons_size=3, n_iter=200, metric='rmse'), 'Hyperopt')
               ]

## Consensus comparison
### Input/output data path

In [15]:
bench_folder =  Path("benchmark_model_prediction/chembl").resolve()

### Consensus building

In [16]:
estimators = [
    (LinearRegression(), 'LinearRegression'),
    (RandomForestRegressor(), 'RandomForestRegressor'),
    (MLPRegressor(), 'MLPRegressor'),
    (Ridge(), 'Ridge'),
    (SVR(), 'SVR'),
    (KNeighborsRegressor(), 'KNeighborsRegressor')
]

In [17]:
# res_df_val = pd.DataFrame()
res_df_test = pd.DataFrame()
res_df_train_stack = pd.DataFrame()
res_df_test_stack = pd.DataFrame()

In [18]:

for bench_name in tqdm(os.listdir(bench_folder)[:]):

    # load data
    # df_val = pd.read_csv(os.path.join(bench_folder, bench_name, f"{bench_name}_val.csv"))
    df_test = pd.read_csv(os.path.join(bench_folder, bench_name, f"{bench_name}_test.csv"))
    df_train = pd.read_csv(os.path.join(bench_folder, bench_name, f"{bench_name}_traincv.csv"))
    
    # remove y_true column prof predictions table
    # x_val, y_val = df_val.iloc[:, 1:], df_val.iloc[:, 0]
    x_train, y_train = df_train.iloc[:, 1:], df_train.iloc[:, 0]
    x_test, y_test = df_test.iloc[:, 1:], df_test.iloc[:, 0]

    cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=1)

    # build consensus
    for method_func, method_name in method_list:
        cons = method_func.run(x_train, y_train)
        y_pred = x_train[cons].mean(axis=1)
        #
        res_df_train.loc[bench_name, method_name] = calc_accuracy(y_train, x_train[cons].mean(axis=1), metric='r2')
        res_df_test.loc[bench_name, method_name] = calc_accuracy(y_test, x_test[cons].mean(axis=1), metric='r2')
        
        # # stacking model
        # x_stack, y_stack = df_train.iloc[:, 1:], df_train.iloc[:, 0]

    for model, name in estimators:
        model.fit(x_train, y_train, cv=cv)
        predictions = model.predict(x_test)
        # predictions = cross_val_predict(model, x_train, y_train, cv=cv, n_jobs=-1) 
        res_df_test.loc[bench_name, f"Stacking_test_{model}"] = calc_accuracy(y_test, predictions, metric='r2')
    
    

100%|██████████| 18/18 [00:58<00:00,  3.25s/it]


In [19]:
res_df_test

Unnamed: 0,Best,Stacking_test_LinearRegression(),Stacking_test_RandomForestRegressor(),Stacking_test_MLPRegressor(),Stacking_test_Ridge(),Stacking_test_SVR(),Stacking_test_KNeighborsRegressor(),Random,Systematic
CHEMBL204,0.744706,-107605.256905,0.770969,-63313.501834,-100043.277926,0.138486,0.639113,0.747799,0.731819
CHEMBL205,0.67025,-441.658204,0.673174,-995.288392,-434.465703,0.511798,0.526749,-16.890398,0.640829
CHEMBL208,0.620633,-0.957822,0.589363,0.575732,0.317017,0.575518,0.541287,0.593201,0.598381
CHEMBL209,0.844755,0.836498,0.881989,-590.110496,0.853913,-0.04512,0.779443,0.803321,0.809199
CHEMBL210,0.653176,-5.131027,0.674024,0.081533,0.319705,0.571407,0.572971,0.646462,0.618914
CHEMBL211,0.709243,0.534837,0.71104,0.611538,0.563839,0.666347,0.629468,0.664537,0.693549
CHEMBL214,0.648332,0.603422,0.665586,0.54191,0.617517,0.625256,0.535745,0.538577,0.643026
CHEMBL216,0.579281,0.078894,0.462157,0.433827,0.280676,0.506224,0.456679,0.524082,0.537975
CHEMBL217,0.60602,0.554334,0.618908,0.539289,0.566902,0.56201,0.492507,0.585557,0.59675
CHEMBL218,0.535147,0.460442,0.541463,0.508737,0.4864,0.508869,0.475148,0.080892,0.521603


## Meta statistics

In [20]:
from collections import Counter

### 1. Mean accuracy

In [21]:
res_df_val.mean(axis=0).round(2)

Best          0.65
Random       -0.71
Systematic    0.65
dtype: float64

In [22]:
res_df_test.mean(axis=0).round(2)

Best                                        0.64
Stacking_test_LinearRegression()        -6016.51
Stacking_test_RandomForestRegressor()       0.63
Stacking_test_MLPRegressor()            -3605.35
Stacking_test_Ridge()                   -5581.64
Stacking_test_SVR()                         0.50
Stacking_test_KNeighborsRegressor()         0.54
Random                                     -0.46
Systematic                                  0.62
dtype: float64

### 2. Top-N stat

In [23]:
tmp = []
for dataset in res_df_test.index:
    min_i = res_df_test.loc[dataset].argmin()
    max_i = res_df_test.loc[dataset].argmax()
    #
    tmp.append(res_df_test.loc[dataset].index[max_i])
#
Counter(tmp)

Counter({'Stacking_test_RandomForestRegressor()': 13,
         'Best': 4,
         'Systematic': 1})

## 3. Pair comapison

In [24]:
# tmp = res_df_test["Best"] - res_df_test["All"]
# tmp = res_df_test["Best"] - res_df_test["Systematic"]
# tmp = res_df_test["All"] - res_df_test["Systematic"]
# tmp = res_df_test["Systematic"] - res_df_test["Genetic"]
# tmp = res_df_test["Systematic"] - res_df_test["Stacking"]
tmp = res_df_test["Genetic"] - res_df_test["Stacking"]


res_pair = Counter(np.where(tmp > 0, 1, 2))
res_pair = {k.item():v for k, v in res_pair.items()}
print(dict(sorted(res_pair.items())))

KeyError: 'Genetic'

In [None]:
team_1 = ["Res", "All", "Systematic"]
team_2 = ["Genetic", "Stacking"]

res = {1:0, 2:0}
for dataset in res_df_test.index:
    stats = res_df_test.loc[dataset].to_dict()
    alg = max(stats, key=stats.get)
    if alg in team_1:
        res[1] += 1
    else:
        res[2] += 1
res