In [1]:
import jax
import jax.numpy as jnp
import matplotlib.pyplot as plt
import numpy as np

from tensorflow_probability.substrates import jax as tfp

tfd = tfp.distributions
plt.style.use('ggplot')
%load_ext autoreload

In [2]:
jax.default_backend()

'gpu'

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score
import numpyro as npyro
import scipy.stats as stats
import warnings
warnings.filterwarnings("ignore")

def run_fs_clf(clf, X_train, X_test, y_train, y_test, feats):
    results = {"cv_score": [], "test_score": []}
    for fts in feats:
        X_s_train, X_s_test = X_train[:,np.array(fts, dtype=np.int32)].astype(np.int64), X_test[:,np.array(fts, dtype=np.int32)].astype(np.int64)
        y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)
        if fts.size == 1:
            X_s_train, X_s_test = X_s_train.reshape(-1, 1), X_s_test.reshape(-1, 1)
        cv_score = np.mean(cross_val_score(clf, X_s_train, y_train, scoring="roc_auc"))
        clf_est = clf.fit(X_s_train, y_train)
        test_score = roc_auc_score(y_test, clf_est.predict_proba(X_s_test)[:,1])
        # print({"moses_cv_score": cv_score, "moses_test_score": test_score, "log_cv_score": cv_score, "log_test_score": test_score})
        results["cv_score"].append(cv_score)
        results["test_score"].append(test_score)

    return pd.DataFrame(results)


def fisher_exact_test(X, y, thres=0.05):
    cols = X.columns
    p_values = np.zeros(len(cols))
    for i, col in enumerate(cols):
        table = pd.crosstab(y, X[col])
        _, p_val = stats.fisher_exact(table, alternative="two-sided")
        p_values[i] = p_val

    idx_sig = np.argwhere(p_values < thres)
    print(f"Total of {len(idx_sig)} variables are significant (p_val = {thres})")

    return idx_sig


def build_network(X):
    p = X.shape[1]
    J = np.zeros((p, p))
    cols = X.columns
    intrs = []
    intrs_rev = []
    for i, g1 in enumerate(cols):
        try:
            g_intrs = list(net_intr[g1])
            for g2 in g_intrs:
                if (g2, g1) not in intrs_rev: # check if we haven't encountered the reverse interaction
                    j = cols.get_loc(g2)
                    J[i, j] = 1.0
                    J[j, i] = 1.0
                    intrs.append((g1, g2))
        except KeyError:
            continue

        # Check the reverse direction
        try:
            g_intrs_rev = list(net_intr_rev[g1])
            for g2 in g_intrs_rev:
                if (g1, g2) not in intrs:
                    j = cols.get_loc(g2)
                    J[i, j] = 1.0
                    J[j, i] = 1.0
                    intrs_rev.append((g2, g1))

        except KeyError:
            continue


    return J

def get_ess(n_chain, samples):
    k = int(samples.shape[0] / n_chain)
    chains = samples.reshape(n_chain, k, samples.shape[-1])
    ess = npyro.diagnostics.effective_sample_size(jax.device_get(chains))
    ess[np.isnan(ess)] = 1.0
    return ess

In [None]:
tfd.Mixture

In [4]:
from typing import Callable, NamedTuple
from blackjax.types import PRNGKey, PyTree


class MixedMALAState(NamedTuple):
    """Holds info about the discrete and the continuous r.vs in the mixed support"""

    discrete_position: PyTree
    contin_position: PyTree

    disc_logprob: float
    contin_logprob: float

    discrete_logprob_grad: PyTree
    contin_logprob_grad: PyTree

    disc_step_size: float
    contin_step_size: float


from blackjax.mcmc.diffusion import generate_gaussian_noise
from blackjax.mcmc.mala import MALAState

EPS = 1e-10


def diff_fn(state, step_size):
    theta = jax.tree_util.tree_map(lambda x, g: -0.5 * (g) * (2. * x - 1) - (1. / (2. * step_size)),
                                   state.position, state.logprob_grad)

    return jax.nn.sigmoid(theta)


def take_discrete_step(rng_key: PRNGKey, disc_state: MALAState, contin_state: MALAState,
                       logprob_fn: Callable, disc_grad_fn: Callable,
                       step_size: float) -> MALAState:
    _, key_rmh, key_accept = jax.random.split(rng_key, 3)
    # key_integrator, key_rmh = jax.random.split(rng_key)
    theta_cur = disc_state.position

    u = jax.random.uniform(key_rmh, shape=disc_state.position.shape)
    p_curr = diff_fn(disc_state, step_size)
    ind = jnp.array(u < p_curr)
    pos_new = (1. - theta_cur) * ind + theta_cur * (1. - ind)

    logprob_new = logprob_fn(pos_new, contin_state.position)
    logprob_grad_new = disc_grad_fn(pos_new, contin_state.position)
    new_state = MALAState(pos_new, logprob_new, logprob_grad_new)  # No metropolis update just accept the move

    return new_state


def take_contin_step(rng_key: PRNGKey, disc_state: MALAState, contin_state: MALAState,
                     logprob_fn: Callable, contin_grad_fn: Callable,
                     step_size: float) -> MALAState:
    key_integrator, key_rmh = jax.random.split(rng_key)
    noise = generate_gaussian_noise(key_integrator, contin_state.position)
    new_position = jax.tree_util.tree_map(
        lambda p, g, n: p + step_size * g + jnp.sqrt(2 * step_size) * n,
        contin_state.position,
        contin_state.logprob_grad,
        noise,
    )

    logprob_new = logprob_fn(disc_state.position, new_position)
    logprob_grad_new = contin_grad_fn(disc_state.position, new_position)
    new_state = MALAState(new_position, logprob_new, logprob_grad_new)

    return new_state


def one_step(
        rng_key: PRNGKey, state: MixedMALAState,
        discrete_logprob_fn: Callable, contin_logprob_fn: Callable,
        discrete_step_size: float, contin_step_size: float
) -> MixedMALAState:
    disc_grad_fn = jax.grad(discrete_logprob_fn)
    contin_grad_fn = jax.grad(contin_logprob_fn, argnums=1)
    # Evolve each variable in tandem and combine the results

    disc_state = MALAState(state.discrete_position, state.disc_logprob, state.discrete_logprob_grad)
    contin_state = MALAState(state.contin_position, state.contin_logprob, state.contin_logprob_grad)
    # print(f"disc pos: {disc_state.position}, contin pos: {contin_state.position}")
    # Take a step for the discrete variable - sample from p(discrete | contin)
    new_disc_state = take_discrete_step(rng_key, disc_state, contin_state,
                                        discrete_logprob_fn, disc_grad_fn, discrete_step_size)
    # Take a step for the contin variable - sample from p(contin | new_discrete)
    new_contin_state = take_contin_step(rng_key, new_disc_state, contin_state,
                                        contin_logprob_fn, contin_grad_fn, contin_step_size)

    new_state = MixedMALAState(new_disc_state.position, new_contin_state.position,
                               new_disc_state.logprob, new_contin_state.logprob,
                               new_disc_state.logprob_grad, new_contin_state.logprob_grad,
                               discrete_step_size, contin_step_size)

    return new_state

def init(disc_position: PyTree,contin_position: PyTree,
         disc_logprob_fn: Callable, contin_logprob_fn: Callable,
         init_disc_step: float, init_contin_step: float) -> MixedMALAState:

    disc_logprob, disc_grad_logprob = jax.value_and_grad(disc_logprob_fn)(disc_position, contin_position)
    contin_logprob, contin_grad_logprob = jax.value_and_grad(contin_logprob_fn, argnums=1)(disc_position, contin_position)

    return MixedMALAState(disc_position, contin_position,
                          disc_logprob, contin_logprob,
                          disc_grad_logprob, contin_grad_logprob,
                          init_disc_step, init_contin_step)

In [5]:
def inference_loop(rng_key, kernel, initial_state, num_samples):
    @jax.jit
    def one_step(state, rng_key):
        state = kernel(rng_key, state)
        return state, state

    keys = jax.random.split(rng_key, num_samples)
    _, states = jax.lax.scan(one_step, initial_state, keys)

    return states

def inference_loop_multiple_chains(rng_key, kernel, initial_state, num_samples, num_chains):

    @jax.jit
    def one_step(state, step_key):
        subkeys = jax.random.split(step_key, num_chains)
        state = jax.vmap(kernel)(subkeys, state)
        return state, state

    keys = jax.random.split(rng_key, num_samples)
    _, states = jax.lax.scan(one_step, initial_state, keys)

    return states

In [6]:
def gamma_energy(theta, J, eta, mu):
    xg = theta.T @ J
    xgx = xg @ theta
    return eta*xgx - mu*jnp.sum(theta)

def generate_disc_logprob_fn(X, y, J, mu, eta):

    def discrete_logprob_fn(gamma, beta):
        # beta = pos["beta"]
        X_gamma = (X @ jnp.diag(gamma))
        ising_logp = gamma_energy(gamma, J, eta, mu)
        ll_dist = tfd.Bernoulli(logits=(X_gamma @ beta))
        log_ll = jnp.sum(ll_dist.log_prob(y), axis=0)

        # print(f"gamma logp: {ising_logp}, log_ll: {log_ll}")

        return ising_logp + log_ll

    return discrete_logprob_fn


def generate_contin_logprob_fn(X, y, tau, c):
    n, p = X.shape
    cov = X.T @ X
    R = np.identity(p)
    v, l = 1., 1.

    def contin_logprob_fn(gamma, beta):
        # beta = pos["beta"]

        D = (gamma*c*tau) + (1 - gamma)*(tau)
        # D_inv = jnp.linalg.inv(jnp.diag(D))

        # A = jnp.linalg.inv((1./sigma**2)*cov + (D_inv @ R @ D_inv))
        beta_dist = tfd.MultivariateNormalDiag(loc=jnp.zeros(p), scale_diag=D)
        # print(beta_dist.sample(seed=rng_key))
        beta_logp = beta_dist.log_prob(beta)
        X_gamma = (X @ jnp.diag(gamma))
        ll_dist = tfd.Bernoulli(logits=(X_gamma @ beta))
        log_ll = jnp.sum(ll_dist.log_prob(y), axis=0)

        # print(f"beta logp: {beta_logp}, log_ll: {log_ll}")

        return beta_logp + log_ll

    return contin_logprob_fn

In [7]:
data_dir = "/home/xabush/code/snet/moses-incons-pen-xp/data"
# data_dir = "."
tamox_df = pd.read_csv(f"{data_dir}/tamoxBinaryEntrez.csv")
tamox_df.head()

Unnamed: 0,posOutcome,4111,4110,10661,131,4438,330,1109,2637,2642,...,7634,55769,7637,7644,741,54993,79364,7791,23140,26009
0,0,0,0,0,1,0,1,1,1,1,...,0,0,0,1,1,0,0,1,0,0
1,1,1,0,0,0,0,0,1,0,1,...,0,0,0,1,1,0,0,1,0,1
2,0,0,0,0,1,0,0,1,1,1,...,0,0,0,1,1,0,0,1,0,0
3,0,0,0,0,0,0,0,1,1,1,...,0,0,0,1,0,0,0,1,0,0
4,1,0,0,0,0,0,1,1,1,1,...,0,0,0,1,1,0,0,1,0,0


In [8]:
regnet_df = pd.read_table(f"{data_dir}/human.source", sep="\t", header=None, names= ["REGULATOR SYMBOL", "REGULATOR ID", "TARGET SYMBOL", "TARGET ID"])
print(f"Total interactions: {regnet_df.shape[0]}")
regnet_df.head()

Total interactions: 372774


Unnamed: 0,REGULATOR SYMBOL,REGULATOR ID,TARGET SYMBOL,TARGET ID
0,USF1,7391,S100A6,6277
1,USF1,7391,DUSP1,1843
2,USF1,7391,C4A,720
3,USF1,7391,ABCA1,19
4,TP53,7157,TP73,7161


In [9]:
net_intr = pd.Series(regnet_df["REGULATOR ID"].values, index=regnet_df["TARGET ID"])
net_intr_rev = pd.Series(regnet_df["TARGET ID"].values, index=regnet_df["REGULATOR ID"])
X_df, y_df = tamox_df.iloc[:, 1:], tamox_df["posOutcome"]


In [10]:
%cd /home/xabush/code/snet/moses-incons-pen-xp
import os
import glob
import time
import joblib
import datetime
import itertools
from sklearn.model_selection import KFold
from notebooks.variable_selection.MosesEstimator import *
from sklearn.svm import SVC

def load_mcmc_exp_res(data_dir, moses_res=False, moses_col="moses_val_score",
                      mcmc_col="cv_score"):
    df_lst = []

    for file in glob.glob(f"{data_dir}/*.csv"):
        # print(f"file: {file}")
        df = pd.read_csv(file)
        if moses_res:
            df = df.sort_values(by=moses_col, ascending=False).head(1) # take the top row of the sorted df
        else:
            df = df.sort_values(by=mcmc_col, ascending=False).head(1)

        # print(f"seed: {df['seed']}")
        df_lst.append(df)

    res_df = pd.concat(df_lst, axis=0)
    res_df = res_df.reset_index(drop=True)
    return res_df

def load_mcmc_exp_len(data_dir, size):
    df_lst = []

    for file in glob.glob(f"{data_dir}/*.csv"):
        df = pd.read_csv(file)
        df = df[df["len"] <= size]
        df = df.sort_values(by="cv_score", ascending=False).head(1)
        df_lst.append(df)

    res_df = pd.concat(df_lst, axis=0)
    res_df = res_df.reset_index(drop=True)
    return res_df




def run_gibbs_sampling(seed, X_df, y_df, eta, mu, thres):
    start_time = time.time()
    num_chains = 3
    disc_step_size = 0.1
    contin_step_size = 1e-5
    n_steps = 10000
    tau, c = 0.01, 1000
    burn_in = 0.1


    key = jax.random.PRNGKey(seed)
    np.random.random(seed)
    X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, shuffle=True, random_state=seed,
                                                        stratify=y_df, test_size=0.3)
    idx_sig = np.load(f"{data_dir}/exp_data_2/npy/idx_sig_s_{seed}.npy")
    X_train, X_test = X_train.iloc[:,idx_sig], X_test.iloc[:,idx_sig]
    J = build_network(X_train)
    p = J.shape[1]
    beta_dist = tfd.MultivariateNormalDiag(loc=jnp.zeros(p), scale_diag=10 * jnp.ones(p))
    gamma_dist = tfd.Bernoulli(probs=0.5 * jnp.ones(p))

    contin_init_pos = beta_dist.sample(seed=key, sample_shape=(num_chains,))
    disc_init_pos = gamma_dist.sample(seed=key, sample_shape=(num_chains,)) * 1.


    X_train_dev, y_train_dev = jax.device_put(X_train.to_numpy()), jax.device_put(y_train.to_numpy())
    disc_logprob = generate_disc_logprob_fn(X_train_dev, y_train_dev, J, mu, eta)
    contin_logprob = generate_contin_logprob_fn(X_train_dev, y_train_dev, tau, c)

    kernel = jax.jit(lambda key, state: one_step(key, state, disc_logprob, contin_logprob, disc_step_size, contin_step_size))

    init_state = jax.vmap(init, in_axes=(0, 0, None, None, None, None))(disc_init_pos, contin_init_pos, disc_logprob, contin_logprob,
                                                                        disc_step_size, contin_step_size)

    states = inference_loop_multiple_chains(key, kernel, init_state, num_samples=n_steps, num_chains=num_chains)
    # states = inference_loop(key, kernel, init_state, num_samples=n_steps)
    gamma_samples = states.discrete_position[int(burn_in*n_steps):]

    gamma_samples = gamma_samples.reshape(-1, p)

    gamma_means = jnp.mean(gamma_samples, axis=0)
    idx = jnp.squeeze(jnp.argwhere(gamma_means > thres))
    print(f"---- Inference took {(time.time() - start_time) : .2f} seconds -----")
    return idx



def run_moses_on_fs(seed, X, y, complx_ratios=None, tmps=None, div_pres=None,
                    feats=None, init_exemplar=None, prob="it", hnn=0.1):

    s_time = time.time()

    if complx_ratios is None:
        complx_ratios = [5, 10]
        # complx_ratios = [5, 10, 20, 40, 60, 100, 100]
    if tmps is None:
        tmps = [1000, 2000]
    if div_pres is None:
        # div_pres = [0.0, 0.3, 0.6, 0.9]
        div_pres = [0.6, 0.9]


    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=seed,
                                                        stratify=y, test_size=0.3)

    idx_sig = np.load(f"{data_dir}/exp_data_2/npy/idx_sig_s_{seed}.npy")

    X_train, X_test = X_train.iloc[:,idx_sig].to_numpy(), X_test.iloc[:,idx_sig].to_numpy()
    y_train, y_test = y_train.to_numpy(), y_test.to_numpy()

    if feats is None:
        X_s_train, X_s_test = X_train.astype(np.int64), X_test.astype(np.int64)
    else:
        X_s_train, X_s_test = X_train[:,feats].astype(np.int64), X_test[:,feats].astype(np.int64)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)

    x_train_cv, x_test_cv, y_train_cv, y_test_cv = train_test_split(X_s_train, y_train, shuffle=True, random_state=seed, stratify=y_train, test_size=0.3)

    def run_moses(cr, temp, div):
        if init_exemplar is None:
            moses_opts = ["--complexity-temperature", f"{temp}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
                          "100", "--hc-fraction-of-nn", f"{hnn}", "--diversity-autoscale", "1",
                          "--diversity-pressure", f"{div}"]
        else:
            moses_opts = ["--complexity-temperature", f"{temp}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
                          "100", "--hc-fraction-of-nn", f"{hnn}", "-e", init_exemplar, "--diversity-autoscale", "1",
                          "--diversity-pressure", f"{div}"]
        start_time = time.time()

        # moses_est_r = MosesEstimator(fs_algo=None, complexity_ratio=cr, num_models=100,
        #                              random_state=seed, ensemble=False, num_evals=1000, prob=prob).fit(x_train_cv, y_train_cv, moses_params=moses_opts)
        #
        # cv_score = MosesEstimator.score(moses_est_r, x_test_cv, y_test_cv)
        #
        # # moses_est_r = MosesEstimator(fs_algo=None, complexity_ratio=cr, num_models=100,
        # #                              random_state=seed, ensemble=False, num_evals=30000).fit(X_s_train, y_train, moses_params=moses_opts)
        #
        # # train_score = MosesEstimator.score(moses_est_r, X_s_train, y_train)
        # test_score = MosesEstimator.score(moses_est_r, X_s_test, y_test)


        cv = KFold(n_splits=5)
        cv_scores = []
        for train_idx, test_idx in cv.split(X_s_train, y_train):

            # x_train_cv, x_test_cv, y_train_cv, y_test_cv = train_test_split(X_s_train, y_train, shuffle=True, random_state=seed, stratify=y_train,
            #                                                                 test_size=0.3)
            x_train_cv, x_test_cv, y_train_cv, y_test_cv = X_s_train[train_idx], X_s_train[test_idx], y_train[train_idx], y_train[test_idx]
            moses_est_r_cv = MosesEstimator(fs_algo="smd", complexity_ratio=cr, num_models=100,
                                            random_state=seed, ensemble=False, num_evals=1000, prob=prob, scorers=["mi"])
            moses_est_r_cv.fit(x_train_cv, y_train_cv, moses_params=moses_opts)

            cv_scores.append(MosesEstimator.score(moses_est_r_cv, x_test_cv, y_test_cv))
            moses_est_r_cv.cleanup()

        cv_score = np.mean(np.array(cv_scores))
        moses_est_r = MosesEstimator(fs_algo="smd", complexity_ratio=cr, num_models=100, prob=prob, scorers=["mi"],
                                     random_state=seed, ensemble=False, num_evals=1000).fit(X_s_train, y_train, moses_params=moses_opts)

        # train_score = MosesEstimator.score(moses_est_r, X_s_train, y_train)
        test_score = MosesEstimator.score(moses_est_r, X_s_test, y_test)

        res = {"seed": seed, "complexity_ratio": cr, "complexity_tmp": temp, "div_pres": div,
               "moses_val_score": cv_score, "moses_test_score": test_score}
        end_time = time.time()

        print(f"cr: {cr:.2f}, tmp: {temp: .2f}, div: {div: .2f} ,moses_tr: {cv_score: .4f}, test_score: {test_score: .4f}")
        print(f"============== Took {datetime.timedelta(seconds=(end_time - start_time))} ===============")
        print(len(moses_est_r.models_))
        moses_est_r.cleanup()
        return res

    results = joblib.Parallel(n_jobs=joblib.cpu_count(), require="sharedmem")(
        joblib.delayed(run_moses)(cr, temp, div)
        for cr, temp, div in list(itertools.product(complx_ratios, tmps, div_pres))
    )

    df_dict = {"seed": [] ,"complexity_ratio": [], "complexity_tmp": [], "div_pres": [],
               "moses_val_score": [], "moses_test_score": []}
    for res in results:
        df_dict["seed"].append(res["seed"])
        df_dict["complexity_ratio"].append(res["complexity_ratio"])
        df_dict["complexity_tmp"].append(res["complexity_tmp"])
        df_dict["div_pres"].append(res["div_pres"])
        df_dict["moses_val_score"].append(res["moses_val_score"])
        df_dict["moses_test_score"].append(res["moses_test_score"])

    e_time = time.time()
    print(f"Total elapsed time: {datetime.timedelta(seconds=(e_time - s_time))}")

    return pd.DataFrame(df_dict)


def run_moses(seed, X, y, complexity_ratio=10,
              div_pres=0.6, temp=2000, init_exemplar=None, hnn=0.1, size=-1, prob="it", feats=None):

    start_time = time.time()


    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=seed,
                                                        stratify=y, test_size=0.3)

    idx_sig = np.load(f"{data_dir}/exp_data_2/npy/idx_sig_s_{seed}.npy")

    X_train, X_test = X_train.iloc[:,idx_sig].to_numpy(), X_test.iloc[:,idx_sig].to_numpy()
    y_train, y_test = y_train.to_numpy(), y_test.to_numpy()

    if feats is None:
        if size == -1:
            feats = np.load(f"{data_dir}/exp_data_3/idx_sel/idx_sel_s_{seed}.npy")
        else:
            feats = np.load(f"{data_dir}/exp_data_3/idx_sel_{size}/idx_sel_s_{seed}.npy")
        print(f"Selected feats len: {feats.size}")
        X_s_train, X_s_test = X_train.astype(np.int64), X_test.astype(np.int64)
    else:
        X_s_train, X_s_test = X_train[:,feats].astype(np.int64), X_test[:,feats].astype(np.int64)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)



    if init_exemplar is None:
        moses_opts = ["--complexity-temperature", f"{temp}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
                      "100", "--hc-fraction-of-nn", f"{hnn}", "--diversity-autoscale", "1",
                      "--diversity-pressure", f"{div_pres}", "-l", "DEBUG"]
    else:
        moses_opts = ["--complexity-temperature", f"{temp}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
                      "100", "--hc-fraction-of-nn", f"{hnn}", "-e", init_exemplar, "--diversity-autoscale", "1",
                      "--diversity-pressure", f"{div_pres}"]

    cv = KFold(n_splits=5)
    cv_scores = []
    # cv_estimators = []
    for train_idx, test_idx in cv.split(X_s_train, y_train):

        # x_train_cv, x_test_cv, y_train_cv, y_test_cv = train_test_split(X_s_train, y_train, shuffle=True, random_state=seed, stratify=y_train,
        #                                                                 test_size=0.3)
        x_train_cv, x_test_cv, y_train_cv, y_test_cv = X_s_train[train_idx], X_s_train[test_idx], y_train[train_idx], y_train[test_idx]
        moses_est_r_cv = MosesEstimator(fs_algo=None, complexity_ratio=complexity_ratio, num_models=100,
                                     random_state=seed, ensemble=False, num_evals=1000, prob=prob)
        moses_est_r_cv.fit(x_train_cv, y_train_cv, moses_params=moses_opts)
        try:
            cv_scores.append(MosesEstimator.score(moses_est_r_cv, x_test_cv, y_test_cv))
        # moses_est_r_cv.cleanup()
        except ValueError:
            print("Value Error occurred!")
            return moses_est_r_cv
        # cv_estimators.append(moses_est_r_cv)
    cv_score = np.mean(np.array(cv_scores))
    moses_est_r = MosesEstimator(fs_algo=None, complexity_ratio=complexity_ratio, num_models=100, prob=prob,
                                 random_state=seed, ensemble=False, num_evals=1000).fit(X_s_train, y_train, moses_params=moses_opts)

    # train_score = MosesEstimator.score(moses_est_r, X_s_train, y_train)
    test_score = MosesEstimator.score(moses_est_r, X_s_test, y_test)

    end_time = time.time()

    print(f"cr: {complexity_ratio:.2f}, tmp: {temp: .2f}, div: {div_pres: .2f} ,moses_tr: {cv_score: .4f}, test_score: {test_score: .4f}")
    print(f"============== Took {datetime.timedelta(seconds=(end_time - start_time))} ===============")

    return moses_est_r

def run_moses_on_mcmc_res(res_param_df, X, y, out_dir):

    data_path = f"{data_dir}/exp_data_3"

    # rem_seeds = [23, 99, 763, 1234, 464]

    for i, row  in res_param_df.iterrows():
        seed = int(row["seed"])
        if seed != 1234: # For now, skip seed 1234 as there seems to be reproducibility issue
            start = time.time()
            np.random.seed(seed)
            eta, mu = row["eta"], row["mu"]
            thres, feat_size = row["thres"], int(row["len"])
            cv_score, test_score = row["cv_score"], row["test_score"]
            try:
                idx = np.load(f"{data_path}/idx_sel/idx_sel_s_{seed}.npy")
            except FileNotFoundError:
                idx = run_gibbs_sampling(seed, X, y, eta, mu, thres)
                print(idx.size)
                # idx = np.load(f"{data_path}/idx_sel_100/idx_sel_s_{seed}.npy")
                jnp.save(f"{data_path}/idx_sel/idx_sel_s_{seed}.npy", idx)
            print(f"seed; {seed}, eta: {eta : .3f}, mu: {mu: .3f}, thres: {thres}, len: {feat_size}, cv_score: {cv_score: .3f}, test_score: {test_score: .3f}")
            res_moses_df = run_moses_on_fs(seed, X, y, feats=idx, prob="auc")
            res_moses_sorted = res_moses_df.sort_values(by="moses_val_score", ascending=False).head(1)
            res_moses_df.to_csv(f"{data_path}/{out_dir}/res_moses_s_{seed}.csv", index=False)
            end = time.time()
            print(f"Best CV result: cv_score: {res_moses_sorted['moses_val_score'].iloc[0]}, test_score: {res_moses_sorted['moses_test_score'].iloc[0]}, complexity_ratio: {res_moses_sorted['complexity_ratio'].iloc[0]}, complexity_tmp: {res_moses_sorted['complexity_tmp'].iloc[0]}, div_pres: {res_moses_sorted['div_pres'].iloc[0]}")
            print(f" ====== Done for seed - {seed}, it took {datetime.timedelta(seconds=(end - start))} ============")


    # for i, row  in res_param_df.iterrows():
    #     seed = int(row["seed"])
    #     if seed in rem_seeds: # For now, skip seed 1234 as there seems to be reproducibility issue
    #         np.random.seed(seed)
    #         key = jax.random.PRNGKey(seed)
    #         idx_sig = np.load(f"{data_path}/npy/idx_sig_s_{seed}.npy")
    #         X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.3, random_state=seed, stratify=y, shuffle=True)
    #         X_train, X_test = X_train.iloc[:,idx_sig], X_test.iloc[:,idx_sig]
    #         J = build_network(X_train)
    #         np.fill_diagonal(J, 0.0)
    #         eta, mu = row["eta"], row["mu"]
    #         thres, feat_size = row["thres"], int(row["len"])
    #         cv_score, cv_score = row["cv_score"], row["test_score"]
    #         print(f"seed; {seed}, eta: {eta : .3f}, mu: {mu: .3f}, thres: {thres}, len: {feat_size}, cv_score: {log_cv_score: .3f}, test_score: {log_test_score: .3f}")
    #         idx = run_gibbs_sampling(key, X_train, y_train, J, eta, mu, thres)
    #         np.save(f"{data_path}/idx_sel_100/idx_sel_s_{seed}.npy", idx)
    #         # X_train, X_test = X_train.iloc[:,idx].to_numpy(), X_test.iloc[:,idx].to_numpy()
    #         # y_train, y_test = y_train.to_numpy(), y_test.to_numpy()
    #         #
    #         # res_moses_df = run_moses_on_fs(X_train, X_test, y_train, y_test, seed)
    #         # res_moses_df.to_csv(f"{data_path}/moses/res_moses_s_{seed}.csv")
    #
    #         print(f" ====== Done for seed - {seed} ============")

/home/xabush/code/snet/moses-incons-pen-xp


In [11]:
mcmc_res_df = load_mcmc_exp_res(f"{data_dir}/exp_data_3")
mcmc_res_df

Unnamed: 0,seed,eta,mu,thres,cv_score,test_score,len,num_edges,beta_cv_score,beta_test_score
0,644,0.0,0.1,0.5,0.866676,0.767882,387,44,0.912969,0.718862
1,99,0.0,0.16681,0.5,0.852566,0.69815,236,2,0.903901,0.663076
2,221,0.0,0.16681,0.5,0.883924,0.729633,225,2,0.907725,0.731704
3,21,0.1,0.1,0.5,0.84278,0.736675,429,128,0.88655,0.682132
4,373,0.464159,0.0,0.5,0.849874,0.826291,507,98,0.897923,0.789975
5,61,0.1,0.16681,0.5,0.847535,0.759666,243,98,0.88075,0.727561
6,440,0.0,0.464159,0.4,0.881344,0.716515,603,58,0.903369,0.681304
7,23,0.1,0.464159,0.4,0.873108,0.74565,637,176,0.880851,0.668738
8,464,0.774264,0.0,0.5,0.862885,0.743717,650,186,0.882599,0.733913
9,8,0.0,0.16681,0.5,0.891523,0.73129,189,2,0.907092,0.697459


In [12]:
mcmc_res_df[["cv_score", "test_score"]].mean()

cv_score      0.866158
test_score    0.744254
dtype: float64

In [149]:
run_moses_on_mcmc_res(mcmc_res_df, X_df, y_df)

seed; 644, eta:  0.000, mu:  0.100, thres: 0.5, len: 387, cv_score:  0.867, test_score:  0.768
cr: 10.00, tmp:  1000.00, div:  0.90 ,moses_tr:  0.7293, test_score:  0.6573
99
cr: 5.00, tmp:  1000.00, div:  0.90 ,moses_tr:  0.7218, test_score:  0.6671
99
cr: 10.00, tmp:  1000.00, div:  0.60 ,moses_tr:  0.7553, test_score:  0.7223
100
cr: 10.00, tmp:  2000.00, div:  0.60 ,moses_tr:  0.7553, test_score:  0.7223
100
cr: 5.00, tmp:  2000.00, div:  0.60 ,moses_tr:  0.7544, test_score:  0.7189
100
cr: 5.00, tmp:  1000.00, div:  0.60 ,moses_tr:  0.7544, test_score:  0.7189
100
cr: 5.00, tmp:  1000.00, div:  0.60 ,moses_tr:  0.7544, test_score:  0.7189
100
cr: 10.00, tmp:  2000.00, div:  0.60 ,moses_tr:  0.7553, test_score:  0.7223
100
cr: 5.00, tmp:  2000.00, div:  0.90 ,moses_tr:  0.7218, test_score:  0.6671
99
cr: 10.00, tmp:  1000.00, div:  0.60 ,moses_tr:  0.7553, test_score:  0.7223
100
cr: 10.00, tmp:  2000.00, div:  0.90 ,moses_tr:  0.7293, test_score:  0.6573
99
cr: 5.00, tmp:  2000.00

In [27]:
res_moses_df = load_mcmc_exp_res(f"{data_dir}/exp_data_3/moses_2", moses_res=True)
res_moses_df

Unnamed: 0,seed,complexity_ratio,complexity_tmp,div_pres,moses_val_score,moses_test_score
0,440,10,1000,0.6,0.752712,0.623861
1,221,5,1000,0.6,0.68109,0.660108
2,806,5,1000,0.6,0.64387,0.677161
3,23,5,1000,0.6,0.741721,0.677644
4,644,10,1000,0.6,0.755266,0.722314
5,545,10,2000,0.6,0.692114,0.609431
6,21,10,1000,0.6,0.755011,0.687517
7,8,5,2000,0.6,0.649555,0.632353
8,373,5,1000,0.6,0.702348,0.739506
9,61,10,1000,0.6,0.612512,0.68807


In [28]:
print(res_moses_df["moses_val_score"].mean())
print(res_moses_df["moses_test_score"].mean())

0.7025555267207697
0.6726254965691586


In [78]:
data_path = f"{data_dir}/exp_data_3"
moses_opts = ["--complexity-temperature", f"{2000}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
              "100", "--hc-fraction-of-nn", f"{0.1}", "--diversity-autoscale", "1",
              "--diversity-pressure", f"{0.6}", "-l", "DEBUG"]

res_dict = {"seed": [], "cv_score": [], "test_score": []}

for i, row  in mcmc_res_df.iterrows():
    seed = int(row["seed"])
    start = time.time()
    np.random.seed(seed)
    eta, mu = row["eta"], row["mu"]
    thres, feat_size = row["thres"], int(row["len"])
    cv_score, test_score = row["cv_score"], row["test_score"]
    try:
        idx = np.load(f"{data_dir}/exp_data_2/idx_sel/idx_sel_s_{seed}.npy")
    except FileNotFoundError:
        idx = run_gibbs_sampling(seed, X_df, y_df, eta, mu, thres)
        print(idx.size)
        # idx = np.load(f"{data_path}/idx_sel_100/idx_sel_s_{seed}.npy")
        jnp.save(f"{data_dir}/exp_data_2/idx_sel/idx_sel_s_{seed}.npy", idx)
    print(f"seed; {seed}, eta: {eta : .3f}, mu: {mu: .3f}, thres: {thres}, len: {feat_size}, cv_score: {cv_score: .3f}, test_score: {test_score: .3f}")


    X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, shuffle=True, random_state=seed,
                                                        stratify=y_df, test_size=0.3)

    idx_sig = np.load(f"{data_dir}/exp_data_2/npy/idx_sig_s_{seed}.npy")
    idx_sel = np.load(f"{data_dir}/exp_data_3/idx_sel/idx_sel_s_{seed}.npy")
    X_train, X_test = X_train.iloc[:,idx_sig].to_numpy(), X_test.iloc[:,idx_sig].to_numpy()
    y_train, y_test = y_train.to_numpy(), y_test.to_numpy()
    X_s_train, X_s_test = X_train[:,idx_sel].astype(np.int64), X_test[:,idx_sel].astype(np.int64)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)

    cv = KFold(n_splits=5)
    cv_scores = []
    for train_idx, test_idx in cv.split(X_s_train, y_train):
        x_train_cv, x_test_cv, y_train_cv, y_test_cv = X_s_train[train_idx], X_s_train[test_idx], y_train[train_idx], y_train[test_idx]
        moses_est_r_cv = MosesEstimator(fs_algo=None, complexity_ratio=5, num_models=100,
                                        random_state=seed, ensemble=False, num_evals=1000, prob="auc")
        moses_est_r_cv.fit(x_train_cv, y_train_cv, moses_params=moses_opts)
        moses_train_cv_out = moses_est_r_cv._eval_models(moses_est_r_cv.models_, assign_cols(pd.DataFrame(x_train_cv), append_y=False))
        moses_test_cv_out = moses_est_r_cv._eval_models(moses_est_r_cv.models_, assign_cols(pd.DataFrame(x_test_cv), append_y=False))
        moses_train_cv_out = moses_train_cv_out.T
        moses_test_cv_out = moses_test_cv_out.T


        x_train_cv_comb = np.concatenate([x_train_cv, moses_train_cv_out], axis=1)
        x_test_cv_comb = np.concatenate([x_test_cv, moses_test_cv_out], axis=1)

        clf = SVC(kernel="rbf", probability=True)
        clf.fit(x_train_cv_comb, y_train_cv)
        cv_score = roc_auc_score(y_test_cv, clf.predict_proba(x_test_cv_comb)[:,1])
        cv_scores.append(cv_score)

        # cv_estimators.append(moses_est_r_cv)
    cv_score = np.mean(np.array(cv_scores))
    moses_est_r = MosesEstimator(fs_algo=None, complexity_ratio=5, num_models=100, prob="auc",
                                 random_state=seed, ensemble=False, num_evals=1000).fit(X_s_train, y_train, moses_params=moses_opts)

    moses_train_out = moses_est_r._eval_models(moses_est_r.models_, assign_cols(pd.DataFrame(X_s_train), append_y=False))
    moses_test_out = moses_est_r._eval_models(moses_est_r.models_, assign_cols(pd.DataFrame(X_s_test), append_y=False))
    moses_train_out = moses_train_out.T
    moses_test_out = moses_test_out.T

    clf = SVC(kernel="rbf", probability=True)
    X_s_train_moses_comb = np.concatenate([X_s_train, moses_train_out], axis=1)
    X_s_test_moses_combo = np.concatenate([X_s_test, moses_test_out], axis=1)

    clf.fit(X_s_train_moses_comb, y_train)
    test_score = roc_auc_score(y_test, clf.predict_proba(X_s_test_moses_combo)[:,1])
    res_dict["seed"].append(seed)
    res_dict["cv_score"].append(cv_score)
    res_dict["test_score"].append(test_score)
    print(f"cv_comb: {cv_score: .4f}, test_comb: {test_score: .4f}")


seed; 644, eta:  0.000, mu:  0.100, thres: 0.5, len: 387, cv_score:  0.867, test_score:  0.768
cv_comb:  0.8562, test_comb:  0.7639
seed; 99, eta:  0.000, mu:  0.167, thres: 0.5, len: 236, cv_score:  0.853, test_score:  0.698
cv_comb:  0.8116, test_comb:  0.7068
seed; 221, eta:  0.000, mu:  0.167, thres: 0.5, len: 225, cv_score:  0.884, test_score:  0.730
cv_comb:  0.8377, test_comb:  0.7352
seed; 21, eta:  0.100, mu:  0.100, thres: 0.5, len: 429, cv_score:  0.843, test_score:  0.737
cv_comb:  0.8140, test_comb:  0.7306
seed; 373, eta:  0.464, mu:  0.000, thres: 0.5, len: 507, cv_score:  0.850, test_score:  0.826
cv_comb:  0.8360, test_comb:  0.8277
seed; 61, eta:  0.100, mu:  0.167, thres: 0.5, len: 243, cv_score:  0.848, test_score:  0.760
cv_comb:  0.8095, test_comb:  0.7628
seed; 440, eta:  0.000, mu:  0.464, thres: 0.4, len: 603, cv_score:  0.881, test_score:  0.717
cv_comb:  0.8754, test_comb:  0.7113
seed; 23, eta:  0.100, mu:  0.464, thres: 0.4, len: 637, cv_score:  0.873, test

In [79]:
res_moses_svm_comb_df = pd.DataFrame(res_dict)
res_moses_svm_comb_df

Unnamed: 0,seed,cv_score,test_score
0,644,0.856168,0.763877
1,99,0.811591,0.706849
2,221,0.837707,0.735156
3,21,0.813991,0.730599
4,373,0.836009,0.827672
5,61,0.809459,0.762773
6,440,0.875427,0.711268
7,23,0.85109,0.729978
8,464,0.840756,0.741646
9,8,0.826375,0.704778


In [80]:
res_moses_svm_comb_df[["cv_score", "test_score"]].mean()

cv_score      0.832734
test_score    0.739495
dtype: float64

In [83]:
data_path = f"{data_dir}/exp_data_3"
moses_opts = ["--complexity-temperature", f"{2000}", "--hc-crossover-min-neighbors", "500", "--hc-crossover-pop-size",
              "100", "--hc-fraction-of-nn", f"{0.1}", "--diversity-autoscale", "1",
              "--diversity-pressure", f"{0.6}", "-l", "DEBUG"]

res_dict_2 = {"seed": [], "cv_score": [], "test_score": []}

for i, row  in mcmc_res_df.iterrows():
    seed = int(row["seed"])
    start = time.time()
    np.random.seed(seed)
    eta, mu = row["eta"], row["mu"]
    thres, feat_size = row["thres"], int(row["len"])
    cv_score, test_score = row["cv_score"], row["test_score"]
    try:
        idx = np.load(f"{data_dir}/exp_data_2/idx_sel/idx_sel_s_{seed}.npy")
    except FileNotFoundError:
        idx = run_gibbs_sampling(seed, X_df, y_df, eta, mu, thres)
        print(idx.size)
        # idx = np.load(f"{data_path}/idx_sel_100/idx_sel_s_{seed}.npy")
        jnp.save(f"{data_dir}/exp_data_2/idx_sel/idx_sel_s_{seed}.npy", idx)
    print(f"seed; {seed}, eta: {eta : .3f}, mu: {mu: .3f}, thres: {thres}, len: {feat_size}, cv_score: {cv_score: .3f}, test_score: {test_score: .3f}")


    X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, shuffle=True, random_state=seed,
                                                        stratify=y_df, test_size=0.3)

    idx_sig = np.load(f"{data_dir}/exp_data_2/npy/idx_sig_s_{seed}.npy")
    idx_sel = np.load(f"{data_dir}/exp_data_3/idx_sel/idx_sel_s_{seed}.npy")
    X_train, X_test = X_train.iloc[:,idx_sig].to_numpy(), X_test.iloc[:,idx_sig].to_numpy()
    y_train, y_test = y_train.to_numpy(), y_test.to_numpy()
    X_s_train, X_s_test = X_train[:,idx_sel].astype(np.int64), X_test[:,idx_sel].astype(np.int64)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)

    cv = KFold(n_splits=5)
    cv_scores = []
    for train_idx, test_idx in cv.split(X_s_train, y_train):
        x_train_cv, x_test_cv, y_train_cv, y_test_cv = X_s_train[train_idx], X_s_train[test_idx], y_train[train_idx], y_train[test_idx]
        moses_est_r_cv = MosesEstimator(fs_algo=None, complexity_ratio=5, num_models=100,
                                        random_state=seed, ensemble=False, num_evals=1000, prob="it")
        moses_est_r_cv.fit(x_train_cv, y_train_cv, moses_params=moses_opts)
        moses_train_cv_out = moses_est_r_cv._eval_models(moses_est_r_cv.models_, assign_cols(pd.DataFrame(x_train_cv), append_y=False))
        moses_test_cv_out = moses_est_r_cv._eval_models(moses_est_r_cv.models_, assign_cols(pd.DataFrame(x_test_cv), append_y=False))
        moses_train_cv_out = moses_train_cv_out.T
        moses_test_cv_out = moses_test_cv_out.T


        x_train_cv_comb = np.concatenate([x_train_cv, moses_train_cv_out], axis=1)
        x_test_cv_comb = np.concatenate([x_test_cv, moses_test_cv_out], axis=1)

        clf = SVC(kernel="rbf", probability=True)
        clf.fit(x_train_cv_comb, y_train_cv)
        cv_score = roc_auc_score(y_test_cv, clf.predict_proba(x_test_cv_comb)[:,1])
        cv_scores.append(cv_score)

        # cv_estimators.append(moses_est_r_cv)
    cv_score = np.mean(np.array(cv_scores))
    moses_est_r = MosesEstimator(fs_algo=None, complexity_ratio=5, num_models=100, prob="it",
                                 random_state=seed, ensemble=False, num_evals=1000).fit(X_s_train, y_train, moses_params=moses_opts)

    moses_train_out = moses_est_r._eval_models(moses_est_r.models_, assign_cols(pd.DataFrame(X_s_train), append_y=False))
    moses_test_out = moses_est_r._eval_models(moses_est_r.models_, assign_cols(pd.DataFrame(X_s_test), append_y=False))
    moses_train_out = moses_train_out.T
    moses_test_out = moses_test_out.T

    clf = SVC(kernel="rbf", probability=True)
    X_s_train_moses_comb = np.concatenate([X_s_train, moses_train_out], axis=1)
    X_s_test_moses_combo = np.concatenate([X_s_test, moses_test_out], axis=1)

    clf.fit(X_s_train_moses_comb, y_train)
    test_score = roc_auc_score(y_test, clf.predict_proba(X_s_test_moses_combo)[:,1])
    res_dict_2["seed"].append(seed)
    res_dict_2["cv_score"].append(cv_score)
    res_dict_2["test_score"].append(test_score)
    print(f"cv_comb: {cv_score: .4f}, test_comb: {test_score: .4f}")

seed; 644, eta:  0.000, mu:  0.100, thres: 0.5, len: 387, cv_score:  0.867, test_score:  0.768
cv_comb:  0.8683, test_comb:  0.7636
seed; 99, eta:  0.000, mu:  0.167, thres: 0.5, len: 236, cv_score:  0.853, test_score:  0.698
cv_comb:  0.8523, test_comb:  0.7017
seed; 221, eta:  0.000, mu:  0.167, thres: 0.5, len: 225, cv_score:  0.884, test_score:  0.730
cv_comb:  0.8588, test_comb:  0.7360
seed; 21, eta:  0.100, mu:  0.100, thres: 0.5, len: 429, cv_score:  0.843, test_score:  0.737
cv_comb:  0.8208, test_comb:  0.7283
seed; 373, eta:  0.464, mu:  0.000, thres: 0.5, len: 507, cv_score:  0.850, test_score:  0.826
cv_comb:  0.8449, test_comb:  0.8284
seed; 61, eta:  0.100, mu:  0.167, thres: 0.5, len: 243, cv_score:  0.848, test_score:  0.760
cv_comb:  0.8427, test_comb:  0.7657
seed; 440, eta:  0.000, mu:  0.464, thres: 0.4, len: 603, cv_score:  0.881, test_score:  0.717
cv_comb:  0.8823, test_comb:  0.7079
seed; 23, eta:  0.100, mu:  0.464, thres: 0.4, len: 637, cv_score:  0.873, test

In [13]:
moses_est = run_moses(644, X_df, y_df, complexity_ratio=5, temp=2000, div_pres=0.6)

Selected feats len: 387
cr: 5.00, tmp:  2000.00, div:  0.60 ,moses_tr:  0.7570, test_score:  0.6478


In [14]:
for m in moses_est.models_:
    print(m.model)

or($f829 !$f1150)
or(!$f805 $f1066)
or(!$f424 !$f1128)
or($f1155 !$f1408)
or(!$f892 !$f1241)
or(!$f644 !$f846)
or(!$f641 !$f954)
or(!$f137 !$f726)
or(!$f664 $f776)
or($f566 !$f882)
or(!$f1314 !$f1492)
or(!$f728 !$f951)
or(!$f329 !$f1122)
or(!$f1419 !$f1499)
or(!$f111 !$f1340)
or(!$f909 !$f1467)
or(!$f1087 !$f1403)
or(!$f86 !$f992)
or(!$f727 $f776)
or(!$f117 !$f372)
or(!$f1281 !$f1405)
!$f392
or(!$f347 $f1287)
or(!$f734 $f1109)
or(!$f48 !$f650)
or($f364 !$f1232)
or($f282 !$f1468)
!$f343
!$f1413
!$f1120
or(!$f1062 !$f1073)
or(!$f331 !$f710)
or(!$f517 !$f632)
or($f955 $f1048)
or(!$f1128 !$f1444)
or(!$f539 !$f1134)
or($f376 !$f642)
or($f625 !$f1191)
!$f1092
or(!$f462 $f1247)
!$f919
or(!$f872 $f1119)
!$f950
or($f77 !$f477)
or(!$f841 !$f1114)
or(!$f877 $f1049)
or(!$f1276 !$f1339)
or(!$f793 $f1286)
or(!$f978 $f1123)
or(!$f900 $f1203)
!$f525
or($f290 !$f1358)
!$f186
or($f522 !$f1023)
or(!$f187 !$f1033)
!$f422
or(!$f204 !$f572)
or($f448 !$f543)
or($f77 $f931)
or($f1233 !$f1404)
or($f50 $f717)
o

In [15]:
moses_est_2 = run_moses(644, X_df, y_df, complexity_ratio=1000, temp=2000, div_pres=0.6)

Selected feats len: 387
cr: 1000.00, tmp:  2000.00, div:  0.60 ,moses_tr:  0.7570, test_score:  0.6478


In [16]:
for m in moses_est_2.models_:
    print(m.model)

or($f829 !$f1150)
or(!$f805 $f1066)
or(!$f424 !$f1128)
or($f1155 !$f1408)
or(!$f892 !$f1241)
or(!$f644 !$f846)
or(!$f641 !$f954)
or(!$f137 !$f726)
or(!$f664 $f776)
or($f566 !$f882)
or(!$f1314 !$f1492)
or(!$f728 !$f951)
or(!$f329 !$f1122)
or(!$f1419 !$f1499)
or(!$f111 !$f1340)
or(!$f909 !$f1467)
or(!$f1087 !$f1403)
or(!$f86 !$f992)
or(!$f727 $f776)
or(!$f117 !$f372)
or(!$f1281 !$f1405)
or(!$f347 $f1287)
or(!$f734 $f1109)
!$f392
or(!$f48 !$f650)
or($f364 !$f1232)
or($f282 !$f1468)
!$f343
!$f1413
or(!$f1062 !$f1073)
!$f1120
or(!$f331 !$f710)
or(!$f517 !$f632)
or($f955 $f1048)
or(!$f1128 !$f1444)
or(!$f539 !$f1134)
or($f376 !$f642)
or($f625 !$f1191)
!$f1092
or(!$f462 $f1247)
or(!$f872 $f1119)
!$f919
!$f950
or($f77 !$f477)
or(!$f841 !$f1114)
or(!$f877 $f1049)
or(!$f1276 !$f1339)
or(!$f793 $f1286)
or(!$f978 $f1123)
or(!$f900 $f1203)
!$f525
or($f290 !$f1358)
or($f522 !$f1023)
!$f186
or(!$f187 !$f1033)
or(!$f204 !$f572)
!$f422
or($f448 !$f543)
or($f77 $f931)
or($f1233 !$f1404)
or($f50 $f717)
o