In [None]:
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
from copy import deepcopy
from tqdm import tqdm

from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
random.seed(42)

In [None]:
def calc_accuracy(y_true, y_pred, metric='mae'):
    if metric == 'mae':
        acc = mean_absolute_error(y_true, y_pred)
    elif metric == 'r2':
        acc = r2_score(y_true, y_pred)
    elif metric == 'rmse':
        acc = root_mean_squared_error(y_true, y_pred)

    return acc

## Consensus methods

In [None]:
from consensus import RandomSearchRegressor, SystematicSearchRegressor, GeneticSearchRegressor
from sklearn.linear_model import LinearRegression, Ridge

In [None]:
method_list = [(SystematicSearchRegressor(cons_size=1, metric='rmse'), 'Best'),
               (RandomSearchRegressor(cons_size=10, n_iter=5000, metric='rmse'), 'Random'),
               (SystematicSearchRegressor(cons_size=10, metric='rmse'), 'Systematic'),
               (GeneticSearchRegressor(cons_size=10, metric='rmse'), 'Genetic')]

## Consensus comparison
### Input/output data path

In [None]:
prediction_folder =  Path("benchmark_prediction/molnet").resolve()
results_folder = Path("benchmark_consensus/molnet").resolve()

os.makedirs(results_folder, exist_ok=True)

### Consensus building

In [None]:
res_df_val = pd.DataFrame()
res_df_test = pd.DataFrame()

data_path = os.listdir(prediction_folder)
for bench_file in tqdm(data_path):

    bench_name = bench_file.split('.')[0]

    # load data
    df = pd.read_csv(os.path.join(bench_dir, bench_file))
    df_val, df_test = train_test_split(df, test_size=0.3, random_state=42)
    #
    x_val, y_val = df_val.iloc[:, 1:], df_val.iloc[:, 0]
    x_test, y_test = df_test.iloc[:, 1:], df_test.iloc[:, 0]
    #
    for method_func, method_name in method_list:
        cons = method_func.run(x_val, y_val)
        y_pred = x_val[cons].mean(axis=1)
        #
        res_df_val.loc[bench_name, method_name] = calc_accuracy(y_val, x_val[cons].mean(axis=1), metric='r2')
        res_df_test.loc[bench_name, method_name] = calc_accuracy(y_test, x_test[cons].mean(axis=1), metric='r2')
        
    # stacking
    stacking_model = LinearRegression()
    model.fit(x_val, y_val)
    #
    res_df_val.loc[bench_name, "Stacking"] = calc_accuracy(y_val, model.predict(x_val), metric='r2')
    res_df_test.loc[bench_name, "Stacking"] = calc_accuracy(y_test, model.predict(x_test), metric='r2')

In [None]:
res_df_val.round(2)

In [None]:
res_df_test.round(2)

## Consensus comparison statistics

In [None]:
from collections import Counter

In [None]:
tmp = []
for dataset in res_df_test.index:
    min_i = res_df_test.loc[dataset].argmin()
    max_i = res_df_test.loc[dataset].argmax()
    #
    tmp.append(res_df_test.loc[dataset].index[max_i])

In [None]:
Counter(tmp)

In [None]:
res_df_val.mean(axis=0).round(2)

In [None]:
res_df_test.mean(axis=0).round(2)