In [None]:
import numpy as np
import pandas as pd
import geneselection.solvers.elasticnet.pca as epca
import geneselection.solvers.elasticnet.utils as eutils
import geneselection.datasets.scrnaseq_cardio_20181129 as cardio

In [None]:
df_gene_sel = pd.read_csv("../results/elastic_net_gene_selection/continuous_regression/intersect_protocol/selected_genes_8_levels_sparsity_thresh_0.5_alpha_0.9.csv")
unpenalized_genes = eutils.get_gene_set(df_gene_sel, num_genes=25)

In [None]:
params = dict(days = "all",                                    # subset of days to run on
              unpenalized_genes = unpenalized_genes,           # number of genes that are predictive of diff day to use (unpenalized) before finding ther genes that are predicitve of the rest of the data
              noise = 0.01,                                    # gaussian noise added to scaled X
              lambda_path = np.geomspace(100, 0.01, num=100),  # lambda path
              alpha = 0.9,                                     # fraction of regularization devoted to L1 prenalty
              n_pcs = 10,                                      # number of pcs to predit with multitask elastic net
              pc_weights = "scaled",                           # relative importance in predicting pcs (scaled = all selected pcs are equally important)
              n_bootstraps = 100,                              # number of bootstrap replicates
              n_processes = 25,                                # number of parallel processes to use
              thresholds = np.linspace(0.01, 1, num=100))      # selection thresholds for including genes

In [None]:
adata = cardio.load()
adata = eutils.preprocess_cardio(adata)
adata = eutils.subset_cardio(adata, days=params["days"])

In [None]:
boot_results = epca.parallel_runs(adata,
                                  n_processes=params["n_processes"],
                                  n_bootstraps=params["n_bootstraps"],
                                  noise=params["noise"],
                                  n_pcs=params["n_pcs"],
                                  alpha=params["alpha"],
                                  lambda_path=params["lambda_path"],
                                  pc_weights=params["pc_weights"],
                                  unpenalized_genes=params["unpenalized_genes"])

In [None]:
eutils.thresh_lambda_plot(boot_results,
                          adata,
                          thresholds=params["thresholds"],
                          lambdas=params["lambda_path"],
                          unpenalized_genes=params["unpenalized_genes"])

In [None]:
residual_variance_genes = eutils.get_selected_genes(boot_results,
                                                    adata,
                                                    lambda_index=65,
                                                    selection_threshold_index=90,
                                                    thresholds=params["thresholds"],
                                                    unpenalized_genes=params["unpenalized_genes"])

In [None]:
list(residual_variance_genes)