In [1]:
import matplotlib 
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from src.utils import *
from src.InstrumentalVariable import InstrumentalVariable
from sklearn.metrics import r2_score

In [2]:
def run_manifold_tests(X, y, max_outlier=0.15, min_p_value=80, max_p_value=95, bootstrap=True, max_l2_reg=0, 
                       n_tests=100):
    n_samples, n_features, _ = X.shape
    experiment_coefs = np.zeros((n_tests, n_features))
    experiment_meta_info = []
    for experiment in experiment_coefs:
        meta_info = {}
        p_value = np.random.uniform(min_p_value, max_p_value)
        meta_info['p_value'] = p_value
        
        if max_outlier > 0:
            nu = np.random.uniform(0, max_outlier)
            meta_info['nu'] = nu            
        else:
            nu = None
            
        if max_l2_reg > 0:
            l2_reg = np.random.uniform(0, max_l2_reg)
            meta_info['l2_reg'] = l2_reg
        else:
            l2_reg = None
        iv_model = InstrumentalVariable(p_value, l2_reg)
        
        feature_size = np.random.randint(5, n_features + 1)
        feature_inds = np.random.choice(n_features, feature_size, replace=False)
        feature_inds = sorted(feature_inds)
        meta_info['features'] = feature_inds
        
        outliers = get_outlier_experiments(np.append(short_metrics, long_metrics, axis=1), nu=nu)
        mask = np.ones(len(X), np.bool)
        mask[outliers] = 0
        
        boostrap_inds = np.random.choice(len(X) - len(outliers), len(X) - len(outliers))
        
        iv_model.fit(X[:, feature_inds][mask][boostrap_inds], y[mask][boostrap_inds])
        y_pred = iv_model.predict(X[:, feature_inds])
        r2 = r2_score(y, y_pred)
        meta_info['r2'] = r2
        np.put(experiment, feature_inds, iv_model.coef)
        experiment_meta_info.append(meta_info)
    return experiment_coefs, experiment_meta_info

In [3]:
def meta_to_text(meta_info):
    text_info = []
    for exp_info in meta_info:
        text = ""
        for k, v in exp_info.items():
            if isinstance(v, list):
                text_value = ", ".join(list(map(lambda num: str(round(num, 2)), v)))
            else:
                text_value = str(round(v, 2))
            text += str.format("{}: {} ", k, text_value)      
        text_info.append(text)
    return text_info

In [4]:
def plot_coefficients(coefs, meta_info):
    def update_annot(ind):
        text = "\n".join(list(map(text_info.__getitem__, ind["ind"])))
        annot.set_text(text)

    def hover(event):
        visible = annot.get_visible()
        cont, ind = False, None
        for col in collections:
            cont, ind = col.contains(event)
            if cont:
                break
        if cont:
            update_annot(ind)
            annot.set_visible(True)
            fig.canvas.draw_idle()
        elif visible:
            annot.set_visible(False)
            fig.canvas.draw_idle()
    
    def on_pick(event):
        for col in collections:
            cur_color = col._facecolors[event.ind][0]
            if np.array_equal(cur_color, np.array(mcolors.to_rgba('r'))):
                new_color = mcolors.to_rgba('b')
            else:
                new_color = mcolors.to_rgba('r')
            col._facecolors[event.ind] = new_color
            col._edgecolors[event.ind] = new_color
        fig.canvas.draw_idle()

    n_tests, n_features = coefs.shape
    positive_coefs = np.apply_along_axis(lambda feature: len(np.where(feature > 0)[0]), 0, coefs)
    negative_coefs = np.apply_along_axis(lambda feature: len(np.where(feature < 0)[0]), 0, coefs)
    filtered_coefs = []
    filtered_coef_nums = []
    for i, feature_coefs in enumerate(coefs.T):
        pos = positive_coefs[i]
        neg = negative_coefs[i]
        if pos == 0 or neg == 0 or min(pos/neg, neg/pos) < 0.2:
            filtered_coefs.append(feature_coefs)
            filtered_coef_nums.append(i)
    filtered_coefs = np.array(filtered_coefs)        
    
    text_info = meta_to_text(meta_info)
    
    fig, axes = plt.subplots(nrows=len(filtered_coefs), sharex=True)
    collections = []
    annots = []
    for i in range(len(filtered_coefs)):
        ax = axes[i]
        ax.set_title('Weights for short_term_' + str(filtered_coef_nums[i]), loc='left')
        # ax.title.set_text('Weights for short_term_' + str(filtered_coef_nums[i]))
        ax.plot([0, 0], [-1, 1], 'r')
        annot = ax.annotate("", xy=(50, 50), xycoords='figure pixels', xytext=(50, 50), textcoords='figure pixels', 
                    bbox=dict(boxstyle="round", fc="w"))
        annot.set_visible(False)
        col = ax.scatter(filtered_coefs[i], np.random.rand(n_tests) * 2 - 1, color=["blue"]*n_tests, picker=5, s=50)
        collections.append(col)
        annots.append(annot)
        
    fig.canvas.mpl_connect("pick_event", on_pick)
    fig.canvas.mpl_connect("motion_notify_event", hover)
    plt.show()

In [5]:
short_metrics_p, long_metrics_p = read_data(shift=True)
short_metrics = short_metrics_p[:, :, 0]
long_metrics = long_metrics_p[:, :, 0]

target_metric_p = long_metrics_p[:, 0, :]
target_metric = target_metric_p[:, 0]

In [8]:
coefs, info = run_manifold_tests(short_metrics_p, target_metric, max_l2_reg=0.1, 
                                 min_p_value=95, max_p_value=99, n_tests=1000)

In [9]:
plot_coefficients(coefs, info)