In [1]:
import sys
sys.executable

'D:\\Dev\\jupyter\\projects\\Data Science\\venv\\Scripts\\python.exe'

In [2]:
from itertools import permutations, combinations_with_replacement
import numpy as np

def find_optimum_weights_for_ensemble(models, data_for_models, y_test, metric, target):
    assert target in ['minimize', 'maximize']
    
    n_models = len(models.keys())
    preds = {}
    for model_name, model in models.items():
        preds[model_name] = model.predict(data_for_models[model_name])
        
    coefs_unique = [
        i for i in combinations_with_replacement(np.linspace(0, 1, 11), n_models) if sum(i) == 1
    ]
    
    if target == 'minimize':
        best_metric = np.inf
    else:
        best_metric = -np.inf
    best_coefs = None
    
    for coefs in coefs_unique:
        for variant in set(list(permutations(coefs, n_models))):
            string = ' '.join([f'{list(models.keys())[i]}: {coef}' for i, coef in enumerate(variant)])
            print(string)
            
            result_predict = np.sum(
                [variant[i] * preds[model_name] for i, model_name in enumerate(preds.keys())],
                axis=0)
            
            result_metric = metric(y_test, result_predict)
            print(f'Metric: {result_metric}')
            
            if target == 'minimize':
                if result_metric < best_metric:
                    best_metric = result_metric
                    best_coefs = variant
            else:
                if result_metric > best_metric:
                    best_metric = result_metric
                    best_coefs = variant
    return best_coefs, best_metric

In [3]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
x, y = load_diabetes(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((296, 10), (296,), (146, 10), (146,))

In [5]:
linreg = Ridge()
linreg.fit(x_train, y_train)
linreg_preds = linreg.predict(x_test)
mean_squared_error(y_test, linreg_preds), r2_score(y_test, linreg_preds)

(3196.982774079495, 0.4445111375735453)

In [6]:
tree = GradientBoostingRegressor(max_depth=3, min_samples_leaf=12, random_state=42)
tree.fit(x_train, y_train)
tree_preds = tree.predict(x_test)
mean_squared_error(y_test, tree_preds), r2_score(y_test, tree_preds)

(3131.57320735605, 0.4558763179259717)

In [7]:
svm = SVR(kernel='poly')
svm.fit(x_train, y_train)
svm_preds = svm.predict(x_test)
mean_squared_error(y_test, svm_preds), r2_score(y_test, svm_preds)

(4345.92389814928, 0.24487790739817772)

In [8]:
models = {
    'Ridge': linreg,
    'Tree': tree,
    'SVM': svm
}

data_for_models = {
    'Ridge': x_test,
    'Tree': x_test,
    'SVM': x_test
}

find_optimum_weights_for_ensemble(
    models=models,
    data_for_models=data_for_models,
    y_test=y_test,
    metric=mean_squared_error,
    target='minimize'
)

Ridge: 1.0 Tree: 0.0 SVM: 0.0
Metric: 3196.982774079495
Ridge: 0.0 Tree: 0.0 SVM: 1.0
Metric: 4345.92389814928
Ridge: 0.0 Tree: 1.0 SVM: 0.0
Metric: 3131.57320735605
Ridge: 0.0 Tree: 0.1 SVM: 0.9
Metric: 4016.4625686344402
Ridge: 0.0 Tree: 0.9 SVM: 0.1
Metric: 3044.9820159998553
Ridge: 0.9 Tree: 0.0 SVM: 0.1
Metric: 3275.449968953468
Ridge: 0.9 Tree: 0.1 SVM: 0.0
Metric: 3088.035672600813
Ridge: 0.1 Tree: 0.9 SVM: 0.0
Metric: 3035.7080192220565
Ridge: 0.1 Tree: 0.0 SVM: 0.9
Metric: 4194.6028682092965
Ridge: 0.8 Tree: 0.0 SVM: 0.2
Metric: 3362.0120343903322
Ridge: 0.0 Tree: 0.8 SVM: 0.2
Metric: 3004.618882518221
Ridge: 0.0 Tree: 0.2 SVM: 0.8
Metric: 3733.2292969941595
Ridge: 0.2 Tree: 0.8 SVM: 0.0
Metric: 2962.599752156139
Ridge: 0.2 Tree: 0.0 SVM: 0.8
Metric: 4051.3767088322033
Ridge: 0.8 Tree: 0.2 SVM: 0.0
Metric: 3001.845492190206
Ridge: 0.30000000000000004 Tree: 0.0 SVM: 0.7000000000000001
Metric: 3916.245420017999
Ridge: 0.30000000000000004 Tree: 0.7000000000000001 SVM: 0.0
Metric:

((0.5, 0.5, 0.0), 2879.816477366835)

In [9]:
find_optimum_weights_for_ensemble(
    models=models,
    data_for_models=data_for_models,
    y_test=y_test,
    metric=r2_score,
    target='maximize'
)

Ridge: 1.0 Tree: 0.0 SVM: 0.0
Metric: 0.4445111375735453
Ridge: 0.0 Tree: 0.0 SVM: 1.0
Metric: 0.24487790739817772
Ridge: 0.0 Tree: 1.0 SVM: 0.0
Metric: 0.4558763179259717
Ridge: 0.0 Tree: 0.1 SVM: 0.9
Metric: 0.302123163966193
Ridge: 0.0 Tree: 0.9 SVM: 0.1
Metric: 0.4709218923884283
Ridge: 0.9 Tree: 0.0 SVM: 0.1
Metric: 0.4308771408026718
Ridge: 0.9 Tree: 0.1 SVM: 0.0
Metric: 0.4634411430636367
Ridge: 0.1 Tree: 0.9 SVM: 0.0
Metric: 0.472533287345578
Ridge: 0.1 Tree: 0.0 SVM: 0.9
Metric: 0.27117055666237766
Ridge: 0.8 Tree: 0.0 SVM: 0.2
Metric: 0.4158366270880951
Ridge: 0.0 Tree: 0.8 SVM: 0.2
Metric: 0.4779351522919434
Ridge: 0.0 Tree: 0.2 SVM: 0.8
Metric: 0.351336105975267
Ridge: 0.2 Tree: 0.8 SVM: 0.0
Metric: 0.48523614844188434
Ridge: 0.2 Tree: 0.0 SVM: 0.8
Metric: 0.2960566889828745
Ridge: 0.8 Tree: 0.2 SVM: 0.0
Metric: 0.4784170402304284
Ridge: 0.30000000000000004 Tree: 0.0 SVM: 0.7000000000000001
Metric: 0.31953630435966873
Ridge: 0.30000000000000004 Tree: 0.7000000000000001 SVM:

((0.5, 0.5, 0.0), 0.49962008179100514)