In [2]:
# Needed to import custom code from other directories
import sys
sys.path.append('../../code')

import numpy as np
import pandas as pd
from scipy.optimize import differential_evolution

from utils import LRAP

SEED = 42

NUM_FEATURES = 5000
NUM_CLASSES = 3993

y_valid = pd.read_csv("../../data/expanded/dev_labels.csv", names=range(NUM_CLASSES)).to_numpy()

nn = pd.read_csv("../../public_data/nn_ensemble_old.csv", names=range(NUM_CLASSES)).to_numpy()
knn = pd.read_csv("../../public_data/knn.csv", names=range(NUM_CLASSES), header=0).to_numpy()
svm = pd.read_csv("../../public_data/svm.csv", names=range(NUM_CLASSES), header=0).to_numpy()
rf = pd.read_csv("../../public_data/saved_rf_probabilities.csv", names=range(NUM_CLASSES), header=0).to_numpy()

In [3]:
LRAP(y_valid, nn)

0.6372752007668268

In [4]:
LRAP(y_valid, rf)

0.5604580391775857

In [5]:
LRAP(y_valid, knn)

0.2912844932011161

In [6]:
LRAP(y_valid, svm)

0.3391564618701288

In [7]:
e = 0.83 * nn + 0.14 * rf + 0.02 * svm + 0.01 * knn
LRAP(y_valid, e)

0.6416900423600906

In [31]:
# normalize a vector to have unit norm
def normalize(weights):
    result = np.linalg.norm(weights, 1)
    # check for a vector of all zeros
    if result == 0.0:
        return weights
    # return normalized vector (unit norm)
    return weights / result

# loss function for optimization process, designed to be minimized
def loss_function(weights, predictions, y_valid):
    # normalize weights
    normalized = normalize(weights)
    # calculate error rate
    ensemble = (normalized[0] * predictions[0]).copy()
    for i in range(1, len(predictions)):
        ensemble += normalized[i] * predictions[i]
    return 1.0 - LRAP(y_valid, ensemble)

In [10]:
inter1 = nn * svm
inter2 = nn * rf
inter3 = nn * knn

In [11]:
bound_w = [(0.8, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.6421046287569429
{'nn': 0.68, 'rf': 0.12, 'svm': 0.03, 'knn': 0.02, 'inter1': 0.1, 'inter2': 0.01, 'inter3': 0.04}


In [12]:
bound_w = [(0.8, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (-0.1, 0.1), (-0.1, 0.1), (-0.1, 0.1)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.642052920268126
{'nn': 0.71, 'rf': 0.14, 'svm': 0.04, 'knn': 0.03, 'inter1': 0.01, 'inter2': -0.03, 'inter3': -0.03}


In [13]:
bound_w = [(0.9, 1.0), (0.0, 0.3), (0.0, 0.2), (0.0, 0.2), (-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.6419611602794333
{'nn': 0.72, 'rf': 0.18, 'svm': 0.04, 'knn': 0.01, 'inter1': 0.0, 'inter2': 0.02, 'inter3': -0.02}


In [14]:
inter1 = nn * svm
inter2 = nn * rf
inter3 = nn * knn
inter4 = nn * nn
inter5 = rf * rf

In [15]:
bound_w = [(0.8, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.1), (0.0, 0.1), (0.0, 0.1), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3", "inter4", "inter5"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.6428028403042
{'nn': 0.6, 'rf': 0.06, 'svm': 0.02, 'knn': 0.01, 'inter1': 0.06, 'inter2': 0.04, 'inter3': 0.03, 'inter4': 0.06, 'inter5': 0.11}


In [16]:
bound_w = [(0.6, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.1), (0.0, 0.1), (0.0, 0.1), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3", "inter4", "inter5"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.642777459970584
{'nn': 0.56, 'rf': 0.11, 'svm': 0.03, 'knn': 0.02, 'inter1': 0.03, 'inter2': 0.02, 'inter3': 0.02, 'inter4': 0.11, 'inter5': 0.11}


In [17]:
bound_w = [(0.5, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2), (0.0, 0.1), (0.0, 0.1), (0.0, 0.1), (0.0, 0.25), (0.0, 0.25)]
search_args = ([nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(
    dict(
        zip(
            ["nn", "rf", "svm", "knn", "inter1", "inter2", "inter3", "inter4", "inter5"],
            np.round(normalize(result["x"]), decimals=2)
        )
    )
)

Ensemble LRAP: 0.6428577625344234
{'nn': 0.49, 'rf': 0.03, 'svm': 0.03, 'knn': 0.01, 'inter1': 0.05, 'inter2': 0.05, 'inter3': 0.04, 'inter4': 0.16, 'inter5': 0.13}


In [18]:
e = 1 - loss_function(result["x"], [nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

In [20]:
e = 1 - loss_function(, [nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

0.6428577625344234

In [32]:
x = [0.6511959082759804, 0.004111658314978055, 0.006931800212986918, 0.007915600438598396, 0.43509489423730574, 0.11032858030099131, 0.036874063071821415, 0.04764619568639946, 0.20643462027375903]

In [33]:
e = 1 - loss_function(np.array(x), [nn, rf, svm, knn, inter1, inter2, inter3, inter4, inter5], y_valid)

In [35]:
normalize(x)

array([0.43224793, 0.00272922, 0.00460116, 0.00525418, 0.28880536,
       0.07323342, 0.0244761 , 0.03162638, 0.13702626])

In [34]:
e

0.6450671808764409

### Old stuff

In [5]:
bound_w = [(0.6, 1.0), (0.0, 0.3), (0.0, 0.3), (0.0, 0.3)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6413867078529208
{'nn': 0.78, 'rf': 0.13, 'svm': 0.08, 'knn': 0.01}


In [6]:
bound_w = [(0.7, 1.0), (0.0, 0.3), (0.0, 0.3), (0.0, 0.3)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6413800518698071
{'nn': 0.74, 'rf': 0.17, 'svm': 0.07, 'knn': 0.02}


In [7]:
bound_w = [(0.7, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6417411368048982
{'nn': 0.78, 'rf': 0.13, 'svm': 0.05, 'knn': 0.05}


In [8]:
bound_w = [(0.8, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6418519154727957
{'nn': 0.78, 'rf': 0.14, 'svm': 0.04, 'knn': 0.04}


In [9]:
bound_w = [(0.9, 1.0), (0.0, 0.2), (0.0, 0.2), (0.0, 0.2)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6418554560382703
{'nn': 0.76, 'rf': 0.13, 'svm': 0.05, 'knn': 0.05}


In [10]:
bound_w = [(0.95, 1.0), (0.0, 0.1), (0.0, 0.1), (0.0, 0.1)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6413426296697362
{'nn': 0.87, 'rf': 0.09, 'svm': 0.02, 'knn': 0.02}


In [11]:
bound_w = [(0.95, 1.0), (0.0, 0.05), (0.0, 0.05), (0.0, 0.05)]
search_args = ([nn, rf, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "rf", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.6413081802197139
{'nn': 0.9, 'rf': 0.02, 'svm': 0.04, 'knn': 0.04}


In [12]:
bound_w = [(0.8, 1.0), (0.0, 0.1), (0.0, 0.1)]
search_args = ([nn, svm, knn], y_valid)

result = differential_evolution(loss_function, bound_w, search_args, seed=SEED)
print(f"Ensemble LRAP: {1 - loss_function(result['x'], *search_args)}")
print(dict(zip(["nn", "svm", "knn"], np.round(normalize(result["x"]), decimals=2))))

Ensemble LRAP: 0.640035437848277
{'nn': 0.85, 'svm': 0.07, 'knn': 0.07}
