In [None]:
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
from src.utils import *
from src.InstrumentalVariable import InstrumentalVariable
from tqdm.notebook import tnrange

In [None]:
def run_manifold_tests(X, y, min_p_value=80, max_p_value=95, bootstrap=True, min_l2_reg=0,
                       max_l2_reg=50, n_tests=100):
    n_samples, n_features, _ = X.shape
    experiment_coefs = np.zeros((n_tests, n_features))
    for i in tnrange(n_tests):
        p_value = np.random.uniform(min_p_value, max_p_value)
        if max_l2_reg > 0:
            l2_reg = np.random.uniform(min_l2_reg, max_l2_reg)
        else:
            l2_reg = None
        iv_model = InstrumentalVariable(p_value, l2_reg)
        feature_size = np.random.randint(8, 20)
        feature_inds = np.random.choice(n_features, feature_size, replace=False)

        if bootstrap:
            bootstrap_inds = np.random.choice(len(X), len(X))
            X_train, y_train = X[bootstrap_inds], y[bootstrap_inds]
        else:
            X_train = X
            y_train = y

        X_train = X_train[:, feature_inds]

        iv_model.fit(X_train, y_train)
        np.put(experiment_coefs[i], feature_inds, iv_model.coef_)
    return experiment_coefs

In [None]:
def filter_metrics(coefs):
    positive_coefs = np.apply_along_axis(lambda feature: len(np.where(feature > 0)[0]), 0, coefs)
    negative_coefs = np.apply_along_axis(lambda feature: len(np.where(feature < 0)[0]), 0, coefs)
    print(positive_coefs)
    print(negative_coefs)
    filtered_coef_inds = []
    for i, feature_coefs in enumerate(coefs.T):
        pos = positive_coefs[i]
        neg = negative_coefs[i]
        if pos + neg == 0:
            continue
        if pos == 0 or neg == 0 or min(pos/neg, neg/pos) < 0.2:
            filtered_coef_inds.append(i)
    return np.array(filtered_coef_inds)

In [None]:
def plot_coefficients(coefs, metric_map=None):

    n_tests, n_features = coefs.shape

    fig, axes = plt.subplots(nrows=n_features, sharex=True)
    fig.suptitle("3_metric_stability")
    collections = []
    for i, metric_coefs in enumerate(coefs.T):
        ax = axes[i]
        ax.set_title('Weights for short_term_' + str(metric_map[i]), loc='left')
        ax.plot([0, 0], [-1, 1], 'r')
        metric_coefs = metric_coefs[metric_coefs != 0]
        n_tests = len(metric_coefs)
        col = ax.scatter(metric_coefs, np.random.rand(n_tests) * 2 - 1,
                         cmap=plt.get_cmap("RdBu"), picker=5, s=50)
        collections.append(col)
    plt.show()

In [None]:
short_metrics_p, long_metrics_p = read_data(dataset_name='feed_top_ab_tests_pool_big_dataset.csv' ,shift=True)
short_metrics = short_metrics_p[:, :, 0]
long_metrics = long_metrics_p[:, :, 0]

target_metric_p = long_metrics_p[:, 3, :]   # <--- here you can choose target (0, 1, 2, 3)
target_metric = target_metric_p[:, 0]

In [None]:
#main part of the sandbox, as it allows to change the constraints

coefs = run_manifold_tests(short_metrics_p, target_metric,
                            min_l2_reg=0, max_l2_reg=0.001,
                            min_p_value=50, max_p_value=95, n_tests=1000)

clear_metrics = filter_metrics(coefs)
filtered_coefs = coefs[:, clear_metrics]

In [None]:
plot_coefficients(coefs, range(np.shape(coefs)[1]))



