In [1]:
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
from copy import deepcopy
from tqdm import tqdm

from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split

from collections import defaultdict

In [2]:
random.seed(42)

In [3]:
def calc_accuracy(y_true, y_pred, metric='mae'):
    if metric == 'mae':
        acc = mean_absolute_error(y_true, y_pred)
    elif metric == 'r2':
        acc = r2_score(y_true, y_pred)
    elif metric == 'rmse':
        acc = root_mean_squared_error(y_true, y_pred)

    return acc

## Consensus methods

In [4]:
from consensus import RandomSearchRegressor, SystematicSearchRegressor, GeneticSearchRegressor, HyperoptSearchRegressor

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor

In [5]:
method_list = [(SystematicSearchRegressor(cons_size=1, metric='r2'), 'Best'),
               (SystematicSearchRegressor(cons_size=10**3, metric='r2'), 'All'),
               (RandomSearchRegressor(cons_size=10, n_iter=10000, metric='r2'), 'Random'),
               (SystematicSearchRegressor(cons_size=10, metric='r2'), 'Systematic'),
               (GeneticSearchRegressor(cons_size=10, mut_prob=0.5, metric='r2'), 'Genetic'),
               # (HyperoptSearchRegressor(cons_size=10, n_iter=200, metric='rmse'), 'Hyperopt')
              ]

## Consensus comparison
### Input/output data path

In [97]:
bench_folder =  Path("benchmark_model_prediction_1/chembl").resolve()

### Consensus building

In [None]:
res_df_val = pd.DataFrame()
res_df_test = pd.DataFrame()

opt_cons_dict = defaultdict(list)
for bench_name in tqdm(os.listdir(bench_folder)[:]):

    # load data
    df_val = pd.read_csv(os.path.join(bench_folder, bench_name, f"{bench_name}_val.csv"))
    df_test = pd.read_csv(os.path.join(bench_folder, bench_name, f"{bench_name}_test.csv"))

    # remove y_true column prof predictions table
    x_val, y_val = df_val.iloc[:, 1:], df_val.iloc[:, 0]
    x_test, y_test = df_test.iloc[:, 1:], df_test.iloc[:, 0]
    
    # build consensus
    for method_func, method_name in method_list:
        cons = method_func.run(x_val, y_val)
        y_pred = x_val[cons].mean(axis=1)
        #
        res_df_val.loc[bench_name, method_name] = calc_accuracy(y_val, x_val[cons].mean(axis=1), metric='r2')
        res_df_test.loc[bench_name, method_name] = calc_accuracy(y_test, x_test[cons].mean(axis=1), metric='r2')
        
        # consensus constituttion
        opt_cons_dict[method_name].extend(cons)
        
    # stacking model
    stacking_model = RandomForestRegressor()
    stacking_model.fit(x_val, y_val)
    #
    res_df_val.loc[bench_name, "Stacking"] = calc_accuracy(y_val, stacking_model.predict(x_val), metric='r2')
    res_df_test.loc[bench_name, "Stacking"] = calc_accuracy(y_test, stacking_model.predict(x_test), metric='r2')

 70%|████████████████████████████████████████████████████████▎                        | 139/200 [50:27<22:16, 21.90s/it]

In [None]:
# res_df_val.round(2)

In [None]:
# res_df_test.round(2)

## Meta statistics

In [None]:
from collections import Counter

### 1. Mean accuracy

In [None]:
res_df_val.mean(axis=0).round(2)

In [None]:
res_df_test.mean(axis=0).round(2)

### 2. Top-N stat

In [None]:
tmp = []
for dataset in res_df_test.index:
    min_i = res_df_test.loc[dataset].argmin()
    max_i = res_df_test.loc[dataset].argmax()
    #
    tmp.append(res_df_test.loc[dataset].index[max_i])
#
Counter(tmp)

## 3. Pair comapison

In [None]:
# tmp = res_df_test["Best"] - res_df_test["All"]
# tmp = res_df_test["All"] - res_df_test["Systematic"]
# tmp = res_df_test["All"] - res_df_test["Systematic"]
# tmp = res_df_test["Systematic"] - res_df_test["Genetic"]
# tmp = res_df_test["Systematic"] - res_df_test["Stacking"]
tmp = res_df_test["Genetic"] - res_df_test["Stacking"]


res_pair = Counter(np.where(tmp > 0, 1, 2))
res_pair = {k.item():v for k, v in res_pair.items()}
print(dict(sorted(res_pair.items())))

In [None]:
team_1 = ["Best", "All", "Systematic"]
team_2 = ["Genetic", "Stacking"]

# team_1 = ["Best"]
# team_2 = ["All", "Systematic", "Genetic", "Stacking", "Hyperopt"]

res = {1:0, 2:0}
for dataset in res_df_test.index:
    stats = res_df_test.loc[dataset].to_dict()
    alg = max(stats, key=stats.get)
    if alg in team_1:
        res[1] += 1
    else:
        res[2] += 1
res

## 4. Consensus constitution

In [None]:
import seaborn as sns

In [None]:
descr_list = [i.split("|")[0] for i in opt_cons_dict["Genetic"]]
ml_list = [i.split("|")[1] for i in opt_cons_dict["Genetic"]]

In [None]:
Counter(descr_list)

In [None]:
Counter(ml_list)

## 5. Method consistency

In [None]:
diff_val = res_df_val["Genetic"] - res_df_val["Best"]
diff_test = res_df_test["Genetic"] - res_df_test["Best"]

# diff_val = res_df_val["Genetic"] - res_df_val["Systematic"]
# diff_test = res_df_test["Genetic"] - res_df_test["Systematic"]

# diff_val = res_df_val["Genetic"] - res_df_val["Stacking"]
# diff_test = res_df_test["Genetic"] - res_df_test["Stacking"]

stat = np.where(diff_val > 0, 1, 0) == np.where(diff_test > 0, 1, 0)

In [None]:
print(sum(stat).item())
print(len(stat))
print(round(sum(stat) / len(stat), 2))