# netremCV
Cross-validation approach for estimating the optimal $\beta_{net}$ and $\alpha_{lasso}$.

Selection for $\beta_{net}$ can impact the optimal values for $\alpha_{net}$

In [1]:
import sys
sys.path.append("../code")  # assuming "code" is one directory up and then down into "code"

from DemoDataBuilderXandY import generate_dummy_data
from Netrem_model_builder import netrem, netremCV
import PriorGraphNetwork as graph
import error_metrics as em 
import essential_functions as ef
import netrem_evaluation_functions as nm_eval
import Netrem_model_builder as nm

dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.3, -0.2, -0.8],
                                 num_samples_M = 100000,
                                 train_data_percent = 70)

:) same_train_test_data = False
Please note that since we hold out 30.0% of our 100000 samples for testing, we have:
X_train = 70000 rows (samples) and 5 columns (N = 5 predictors) for training.
X_test = 30000 rows (samples) and 5 columns (N = 5 predictors) for testing.
y_train = 70000 corresponding rows (samples) for training.
y_test = 30000 corresponding rows (samples) for testing.


100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 557.68it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 994.05it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 1684.73it/s]


In [2]:
# 70 samples for training data (used to train and fit GRegulNet model)
X_train = dummy_data.view_X_train_df()
y_train = dummy_data.view_y_train_df()

# 30 samples for testing data
X_test = dummy_data.view_X_test_df()
y_test = dummy_data.view_y_test_df()

In [3]:
# prior network edge_list:
edge_list = [["TF1", "TF2", 0.9], ["TF4", "TF5", 0.75], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"], 
             ["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]
edge_list

[['TF1', 'TF2', 0.9],
 ['TF4', 'TF5', 0.75],
 ['TF1', 'TF3'],
 ['TF1', 'TF4'],
 ['TF1', 'TF5'],
 ['TF2', 'TF3'],
 ['TF2', 'TF4'],
 ['TF2', 'TF5'],
 ['TF3', 'TF4'],
 ['TF3', 'TF5']]

In [4]:
%%time 

netrem_demoCV = netremCV(edge_list = edge_list, X = X_train, y = y_train) 
netrem_demoCV

:) using variance to define beta_net values
beta_min = 1.1506396943803596 and beta_max = 115.06396943803597


:) Generating beta_net and alpha_lasso pairs:   0%|          | 0/50 [00:00<?, ?it/s]

:) Performing NetREmCV with both beta_network and alpha_lasso as UNKNOWN.


  0%|          | 0/50 [00:00<?, ?it/s]

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] END alpha_lasso=0.051694590434151706, beta_net=115.06396943803597; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=115.06396943803597; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=115.06396943803597; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=115.06396943803597; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=115.06396943803597; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=104.74252173191628; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=104.74252173191628; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=104.74252173191628; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=104.74252173191628; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=104.74252173191628; total time=   0.0s
[CV] END alpha_lasso=0.05169

[CV] END alpha_lasso=0.051694590434151706, beta_net=21.195588265333313; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=21.195588265333313; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=21.195588265333313; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=19.294305379391396; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=19.294305379391396; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=19.294305379391396; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=19.294305379391396; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=19.294305379391396; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=17.563571032472005; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=17.563571032472005; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=17.563571032472005; total time=   0.0s

[CV] END alpha_lasso=0.051694590434151706, beta_net=3.554146051839581; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.554146051839581; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.554146051839581; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.554146051839581; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.554146051839581; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.235332675305331; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.235332675305331; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.235332675305331; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.235332675305331; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=3.235332675305331; total time=   0.0s
[CV] END alpha_lasso=0.051694590434151706, beta_net=2.94511743952688; total time=   0.0s
[CV] END al

In [5]:
netrem_demoCV

In [6]:
netrem_demoCV.get_params()

{'info': 'NetREm Model',
 'alpha_lasso': 0.051694590434151706,
 'beta_net': 1.1506396943803596,
 'y_intercept': False,
 'model_type': 'Lasso',
 'max_lasso_iterations': 10000,
 'network': <PriorGraphNetwork.PriorGraphNetwork at 0x20224922950>,
 'verbose': False,
 'all_pos_coefs': False,
 'model_info': 'fitted_model :)',
 'target_gene_y': 'y',
 'tolerance': 0.0001,
 'lasso_selection': 'cyclic'}

In [7]:
netrem_demoCV.test_mse(X_train, y_train)

0.13727397681026726

In [8]:
netrem_demoCV.test_mse(X_test, y_test)

0.13781162327050317

In [9]:
netrem_demoCV.model_nonzero_coef_df

Unnamed: 0,y_intercept,TF1,TF2,TF3,TF5
0,,0.277752,0.063378,0.00145,-0.159248


In [10]:
netrem_demoCV.B_interaction_df

Unnamed: 0,TF1,TF2,TF3,TF4,TF5
TF1,5.414075,1.113261,0.361368,-0.429737,-2.771125
TF2,1.113261,1.439763,-0.107738,-0.12507,-0.773506
TF3,0.361368,-0.107738,37.921695,-0.359079,-0.715906
TF4,-0.429737,-0.12507,-0.359079,1.139272,0.197285
TF5,-2.771125,-0.773506,-0.715906,0.197285,2.877027


In [14]:
b_matrix = nm.organize_B_interaction_network(netrem_demoCV)
b_matrix

Unnamed: 0,TF1,TF2,B_train_weight,sign,potential_interaction,absVal_B,info,candidate_TFs_N,target_gene_y,num_final_predictors,model_type,beta_net,gene_data,rank,percentile
20,TF1,TF5,-2.771125,:(,:( competitive (-),2.771125,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,1.0,95.0
4,TF5,TF1,-2.771125,:(,:( competitive (-),2.771125,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,1.0,95.0
5,TF1,TF2,1.113261,:),:(,1.113261,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,3.0,85.0
1,TF2,TF1,1.113261,:),:(,1.113261,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,3.0,85.0
9,TF5,TF2,-0.773506,:(,:( competitive (-),0.773506,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,5.0,75.0
21,TF2,TF5,-0.773506,:(,:( competitive (-),0.773506,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,5.0,75.0
14,TF5,TF3,-0.715906,:(,:( competitive (-),0.715906,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,7.0,65.0
22,TF3,TF5,-0.715906,:(,:( competitive (-),0.715906,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,7.0,65.0
15,TF1,TF4,-0.429737,:(,:( competitive (-),0.429737,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,9.0,55.0
3,TF4,TF1,-0.429737,:(,:( competitive (-),0.429737,B matrix of TF-TF interactions,5,y,4,Lasso,1.15064,training gene expression data,9.0,55.0
