In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
pd.set_option('display.max_columns', 400)

In [7]:
import os
from pathlib import Path
os.chdir('..')
DATA = Path('_data/genomics-data')
DataCV_dir      = DATA/'DataCV'
Bags_dir        = DATA/'Bags'
FeatRanking_dir = DATA/'FeatRanking'
Log_dir         = DATA/'LogsClassifFeatureRank'

In [148]:
from lightgbm import LGBMClassifier
from scipy.io import loadmat
from SVMExperiments.utils import load_mat

def classify_feature_rank(dataset, feature_set, treatment, 
                          pval_pos_threshold, num_bags, num_runs,
                          num_top_features, test_all_features=False):
    Classifiers = [LGBMClassifier]
    run = 1
    
#     X = load_mat(DataCV_dir/f'{feature_set}/{dataset}_{treatment}({pval_pos_threshold}).trn.mat')
#     T = load_mat(DataCV_dir/f'{feature_set}/{dataset}_{treatment}({pval_pos_threshold}).tst.mat')
    
    FeatRank = pd.read_csv(FeatRanking_dir/f'{feature_set}/{dataset}_{treatment}({pval_pos_threshold}).csv', sep=';')
    Bags     = loadmat(Bags_dir/f'{dataset}({pval_pos_threshold})_Bags{run}.mat')
    
    # Sanity check
    n_samples = 6 if treatment == 'All' else 3
    assert Bags['NegGenePairsBag'].max() == len(X[X.Target==0]) // n_samples
    assert Bags['PosGenePairsBag'].max() == len(X[X.Target==1]) // n_samples
    
    bag_idx = lambda x: list(range(n_samples * (x-1), n_samples * x))
    
    if test_all_features:
        # 6 metadata columns
        num_top_features += [X.shape[1] - 6]
    
    for run in num_runs:
        # [1, 2, 3, 4, 5]
        print(f'Run: {run}') 
        FeatRankRows = FeatRank[FeatRank.Run == run]
        
        for idx, FeatRankRow in FeatRankRows.iterrows():
            # [Entropy, Ttest, Brattacharyya, Wilcoxon]
            print(' '*3, f'Feature ranking method: {FeatRankRow["Method"]}') 
            
            for classifier in Classifiers:
                # [LGBMClassifier, LinearSVC, RandomForestClassifier]
                print(' '*6, f'Classifier: {classifier}')
                
                for num_top_feats in num_top_features:
                    # [2, 4, 6, 7, 10, 12]
                    features = list(FeatRankRow[2:2+num_top_feats])
                    print(' '*9, f'Features: {features}')
                    
                    for n_bags in num_bags:
                        # [100, 200, 300]
                        print(' '*12, f'Number of bags: {n_bags}')
                        
                        for bag_id in range(n_bags):
                            neg = X[X.Target == 0].reset_index(drop=True)
                            pos = X[X.Target == 1].reset_index(drop=True)
                            neg_bag = neg.loc[[x for i in Bags['NegGenePairsBag'][:,bag_id] \
                                                  for x in bag_idx(i)]]
                            pos_bag = pos.loc[[x for i in Bags['PosGenePairsBag'][:,bag_id] \
                                                  for x in bag_idx(i)]]
                            neg_bag_data = neg_bag.iloc[:, 5:-1].iloc[:, features]
                            pos_bag_data = pos_bag.iloc[:, 5:-1].iloc[:, features]
                            print(' '*15, f'Bags shapes: {neg_bag_data.shape}, {pos_bag_data.shape}')
                            
                            
            
    
    
    return FeatRank, Bags # X, T

In [22]:
%%time
X, T = classify_feature_rank('Erk', 'Dc', 'BCR', 0.01, [100,200,300], 
                             [1,2,3,4,5], [2,4,6,8,10,12], True)

CPU times: user 9.88 s, sys: 436 ms, total: 10.3 s
Wall time: 10.3 s


In [149]:
FeatRank, Bags = classify_feature_rank('Erk', 'Dc', 'BCR', 0.01, [100,200,300],
                                       [1,2,3,4,5], [2,4,6,8,10,12], True)

Run: 1
    Feature ranking method: Entropy
       Classifier: <class 'lightgbm.sklearn.LGBMClassifier'>
          Features: [3, 5]
             Number of bags: 100
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 2), (3000, 2)
                Bags shapes: (3000, 

KeyboardInterrupt: 

In [112]:
FeatRank

Unnamed: 0,Run,Method,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45
0,1,Entropy,3,5,4,10,2,21,20,30,31,42,19,33,32,17,6,14,40,8,29,39,24,41,38,18,23,22,45,37,25,36,26,11,7,15,12,9,34,13,35,16,27,1,43,28,44
1,1,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,37,29,6,31,30,16,20,15,27,34,40,33,9,12,38,23,19,35,11,25,42,32,7,36,1,26,14,41,45,13,44,43,28
2,1,Brattacharyya,3,4,5,10,2,21,20,6,30,31,17,42,19,8,33,39,32,29,24,22,40,37,18,14,38,23,36,41,15,45,25,11,26,7,9,12,16,34,1,35,13,27,43,28,44
3,1,Wilcoxon,4,17,8,3,10,5,2,22,39,21,37,18,24,6,29,20,12,45,15,30,26,16,31,1,19,34,23,38,9,33,27,11,25,32,40,7,28,36,35,41,13,42,14,44,43
4,2,Entropy,3,5,4,10,2,21,20,30,31,42,19,33,32,17,6,14,8,40,29,39,24,41,38,18,23,22,37,45,25,26,36,11,15,7,12,9,34,13,35,16,27,1,43,28,44
5,2,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,29,37,6,31,30,16,20,15,27,40,34,9,12,33,38,23,35,11,19,25,32,42,7,1,36,26,45,14,41,13,44,28,43
6,2,Brattacharyya,3,4,5,10,2,21,20,6,30,31,17,42,19,8,39,33,29,32,24,22,37,40,18,14,38,15,41,23,36,45,25,11,26,7,12,9,16,34,1,13,35,27,28,43,44
7,2,Wilcoxon,4,17,8,3,10,5,2,22,39,21,37,18,24,6,29,20,12,45,30,15,31,26,16,1,34,23,19,38,9,33,27,11,25,32,40,7,35,36,28,13,44,41,43,42,14
8,3,Entropy,3,5,4,10,2,20,21,30,31,19,42,33,32,17,6,14,8,40,29,39,24,38,18,41,23,22,37,25,36,26,45,7,11,34,12,9,15,13,35,16,27,1,43,28,44
9,3,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,29,37,6,31,30,16,20,27,15,34,40,9,33,12,38,23,11,19,25,35,32,42,7,1,36,26,45,14,41,13,44,43,28


In [132]:
bag_id = 60
neg = X[X.Target == 0].reset_index(drop=True)
pos = X[X.Target == 1].reset_index(drop=True)
neg_bag = neg.loc[[x for i in Bags['NegGenePairsBag'][:,bag_id] for x in bag_idx(i)]]
pos_bag = pos.loc[[x for i in Bags['PosGenePairsBag'][:,bag_id] for x in bag_idx(i)]]

In [133]:
neg_bag.head()

Unnamed: 0,CauseGene,EffectGene,Replicate,Treatment,Pvalue,X0min_neg_effect-X0min_neg_cause,X15min_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X0min_neg_cause,X30min_effect-X15min_cause,X30min_effect-X30min_cause,X90min_effect-X0min_neg_cause,X90min_effect-X15min_cause,X90min_effect-X30min_cause,X90min_effect-X90min_cause,X120min_effect-X0min_neg_cause,X120min_effect-X15min_cause,X120min_effect-X30min_cause,X120min_effect-X90min_cause,X120min_effect-X120min_cause,X180min_effect-X0min_neg_cause,X180min_effect-X15min_cause,X180min_effect-X30min_cause,X180min_effect-X90min_cause,X180min_effect-X120min_cause,X180min_effect-X180min_cause,X210min_effect-X0min_neg_cause,X210min_effect-X15min_cause,X210min_effect-X30min_cause,X210min_effect-X90min_cause,X210min_effect-X120min_cause,X210min_effect-X180min_cause,X210min_effect-X210min_cause,X240min_effect-X0min_neg_cause,X240min_effect-X15min_cause,X240min_effect-X30min_cause,X240min_effect-X90min_cause,X240min_effect-X120min_cause,X240min_effect-X180min_cause,X240min_effect-X210min_cause,X240min_effect-X240min_cause,X360min_effect-X0min_neg_cause,X360min_effect-X15min_cause,X360min_effect-X30min_cause,X360min_effect-X90min_cause,X360min_effect-X120min_cause,X360min_effect-X180min_cause,X360min_effect-X210min_cause,X360min_effect-X240min_cause,X360min_effect-X360min_cause,Target
95412,IRF4,ARHGAP36,1,BCR,0.959,1.756108,0.250318,0.641434,-0.851926,-0.514334,1.361641,0.199062,0.008288,-0.000946,0.292597,0.936614,0.594655,-1.024258,-0.030697,2.511917,-1.090372,-0.055519,0.865742,-0.132499,-1.062465,2.430449,0.283779,-0.040313,0.064986,0.088065,-0.230041,-0.431908,0.491412,-0.15376,-0.242559,0.235736,-0.031529,-0.085451,0.025194,-0.000148,0.364353,-0.276059,-0.130782,0.260398,-0.047362,-0.237615,0.454581,-0.086508,0.048814,0.579168,0
95413,IRF4,ARHGAP36,2,BCR,0.959,0.837445,-0.245342,1.411971,-0.098245,0.201021,0.565754,-0.704776,0.64906,0.238466,2.140382,0.882974,-0.056011,0.263766,-1.194889,3.848652,0.186166,-0.607297,-0.492499,-0.528006,-0.275052,1.03628,0.176823,-0.061352,-0.237074,-0.538371,0.538502,0.313014,2.39615,-0.547723,0.319137,0.045453,0.504857,-1.106404,-0.121381,-0.107577,0.875782,-0.598216,0.316502,-0.104588,1.537581,-2.592753,0.194333,-0.011725,1.028434,5.331981,0
95414,IRF4,ARHGAP36,3,BCR,0.959,6.92353,0.876724,0.710285,-1.652466,-0.42045,1.238145,-1.442733,-0.282039,0.74757,1.493807,-0.306872,-0.206918,0.42095,0.454072,0.723063,0.830696,0.034216,-0.277421,-0.440353,-0.144314,0.503674,0.388102,-0.106582,0.042258,-0.076559,0.067518,0.083197,0.335227,-0.20377,-0.110991,0.10306,0.394877,0.24534,-0.017802,0.059843,1.314929,-0.683677,0.028287,0.114377,0.311345,0.047843,-0.299141,-0.2107,-0.043933,0.594828,0
266799,LEF1,PLEKHG3,1,BCR,0.527,0.213994,0.084237,0.442451,0.087408,0.389927,0.582153,0.205313,0.079571,0.08298,0.263773,0.073141,0.046798,0.092457,0.08681,1.311192,-0.107187,-0.015878,-0.0062,-0.1123,-0.025557,0.169933,0.208808,0.27575,0.317175,0.216113,-0.000756,-0.134579,0.613838,-0.00644,0.093592,0.116339,-0.007422,0.019743,0.048521,-0.01148,0.367406,0.102138,0.181481,0.212927,0.105291,-0.000833,-0.037781,0.209271,0.095695,0.339169,0
266800,LEF1,PLEKHG3,2,BCR,0.527,0.462153,-0.027819,0.422265,-0.169784,0.268046,1.079301,-0.069163,-0.291386,-0.211758,0.940752,-0.018017,0.050917,0.143333,0.023772,0.538979,0.07196,0.0108,-0.016496,-0.181861,-0.002325,0.232535,0.280166,0.722122,0.964112,-1.29914,-0.089025,0.266504,3.809688,0.087091,-0.196192,-0.484607,0.217151,-0.191466,0.016728,-0.358706,0.828635,-0.035252,-0.486181,-0.394601,0.650993,0.11898,-0.097412,-1.452911,0.080221,1.747738,0


In [143]:
neg_bag.iloc[:, 5:-1].iloc[:,[0, 2, 4]].head()

Unnamed: 0,X0min_neg_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X15min_cause
95412,1.756108,0.641434,-0.514334
95413,0.837445,1.411971,0.201021
95414,6.92353,0.710285,-0.42045
266799,0.213994,0.442451,0.389927
266800,0.462153,0.422265,0.268046


In [109]:
neg_bag.Target.describe()

count    3000.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: Target, dtype: float64

In [110]:
pos_bag.Target.describe()

count    3000.0
mean        1.0
std         0.0
min         1.0
25%         1.0
50%         1.0
75%         1.0
max         1.0
Name: Target, dtype: float64

In [96]:
pos_bag.head()

Unnamed: 0,index,CauseGene,EffectGene,Replicate,Treatment,Pvalue,X0min_neg_effect-X0min_neg_cause,X15min_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X0min_neg_cause,X30min_effect-X15min_cause,X30min_effect-X30min_cause,X90min_effect-X0min_neg_cause,X90min_effect-X15min_cause,X90min_effect-X30min_cause,X90min_effect-X90min_cause,X120min_effect-X0min_neg_cause,X120min_effect-X15min_cause,X120min_effect-X30min_cause,X120min_effect-X90min_cause,X120min_effect-X120min_cause,X180min_effect-X0min_neg_cause,X180min_effect-X15min_cause,X180min_effect-X30min_cause,X180min_effect-X90min_cause,X180min_effect-X120min_cause,X180min_effect-X180min_cause,X210min_effect-X0min_neg_cause,X210min_effect-X15min_cause,X210min_effect-X30min_cause,X210min_effect-X90min_cause,X210min_effect-X120min_cause,X210min_effect-X180min_cause,X210min_effect-X210min_cause,X240min_effect-X0min_neg_cause,X240min_effect-X15min_cause,X240min_effect-X30min_cause,X240min_effect-X90min_cause,X240min_effect-X120min_cause,X240min_effect-X180min_cause,X240min_effect-X210min_cause,X240min_effect-X240min_cause,X360min_effect-X0min_neg_cause,X360min_effect-X15min_cause,X360min_effect-X30min_cause,X360min_effect-X90min_cause,X360min_effect-X120min_cause,X360min_effect-X180min_cause,X360min_effect-X210min_cause,X360min_effect-X240min_cause,X360min_effect-X360min_cause,Target
96,96,NFKB1,B9D1,1,BCR,0.006,0.634743,0.405425,0.861286,0.550908,0.335983,0.742303,0.155308,0.070853,0.217418,0.485795,0.328253,0.300368,0.355617,0.166629,0.603715,0.297315,0.23534,0.338603,0.204164,0.256891,0.640419,0.72125,0.695589,0.757521,0.466558,0.252942,0.150943,2.235727,-0.132003,-0.233398,-0.085338,-0.04918,-0.108731,-0.017833,-0.430831,0.465573,-0.09046,-0.023095,-0.108711,-0.382822,-0.094031,-0.079332,-0.566719,0.152948,0.821886,1
97,97,NFKB1,B9D1,2,BCR,0.006,2.294428,1.090934,1.993017,1.672875,1.103051,1.7604,1.119266,0.997259,1.047792,1.483071,0.410406,0.342335,0.368991,0.132092,0.610199,0.521733,0.361105,0.46043,0.212184,0.195272,0.890007,0.005041,-0.061711,0.016952,0.325308,-0.015067,0.086152,0.62105,1.487018,1.384365,1.409133,1.058473,0.027797,-0.153816,0.487649,4.957785,0.006239,0.074491,-0.009886,-0.469871,0.019668,-0.069261,-0.482909,-0.366103,0.731628,1
98,98,NFKB1,B9D1,3,BCR,0.006,2.563328,-0.249979,0.620271,-0.911121,-0.129283,2.054043,-0.672158,0.340465,0.745743,1.368117,-0.777457,0.262316,0.670924,0.794431,0.882325,0.794273,0.012659,-0.840488,-0.351948,-0.321316,1.196144,0.377506,-0.373926,-0.029409,-0.183635,-0.153787,0.137281,0.794291,0.541243,-0.025173,-0.460369,-0.517854,-0.448283,0.24062,0.042997,0.504735,-0.000498,0.084153,-0.074622,0.050601,0.030119,0.081724,-0.098205,0.019212,0.410151,1
291,291,NFKB1,DISP1,1,BCR,0.001,0.387296,-0.066805,0.86497,-0.64905,-0.060063,1.993179,0.102024,-0.014595,-0.195282,0.245597,-0.235508,0.074029,0.454525,-0.111716,0.350206,-0.029306,-0.312327,0.241598,-0.013817,0.003828,0.45813,-0.25557,0.173172,0.695678,-0.162038,0.200151,-0.070765,1.240299,-0.246185,-0.000141,0.499385,-0.137918,0.163607,0.099517,0.173458,0.356363,-0.118943,-0.013099,0.239179,-0.471851,0.183096,0.230653,0.463216,0.327644,2.479069,1
292,292,NFKB1,DISP1,2,BCR,0.001,0.810465,-0.207964,2.045332,0.545973,-0.026178,0.739313,0.329741,1.255969,0.517232,2.624197,-0.003567,0.532475,0.103928,0.301867,0.976511,0.516599,1.435613,0.695909,2.197795,0.260209,4.103924,-0.001353,-0.078288,-0.000795,0.471777,-0.030186,0.515357,0.761399,-0.242134,-0.287566,-0.396819,-0.897667,-0.117614,-1.048664,-0.106002,0.954805,0.000733,-0.08008,0.000234,0.43596,-0.035901,0.479142,0.584227,-0.129072,0.631269,1


In [92]:
len([x for i in Bags['NegGenePairsBag'][:,bag_id] \
                for x in bag_idx(i)])

3000

In [87]:
neg_bag.shape

(3000, 51)

In [None]:
bag_idxs = [x for i in Bags for x in bag_idx]

In [122]:
pos = X[X.Target == 0].reset_index(drop=True)

In [60]:
Bags['NegGenePairsBag'][:5,0]

array([ 9912, 56573, 65690, 79803, 33264], dtype=int32)

In [50]:
len(pos)/3

102453.0

In [124]:
X.head()

Unnamed: 0,CauseGene,EffectGene,Replicate,Treatment,Pvalue,X0min_neg_effect-X0min_neg_cause,X15min_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X0min_neg_cause,X30min_effect-X15min_cause,X30min_effect-X30min_cause,X90min_effect-X0min_neg_cause,X90min_effect-X15min_cause,X90min_effect-X30min_cause,X90min_effect-X90min_cause,X120min_effect-X0min_neg_cause,X120min_effect-X15min_cause,X120min_effect-X30min_cause,X120min_effect-X90min_cause,X120min_effect-X120min_cause,X180min_effect-X0min_neg_cause,X180min_effect-X15min_cause,X180min_effect-X30min_cause,X180min_effect-X90min_cause,X180min_effect-X120min_cause,X180min_effect-X180min_cause,X210min_effect-X0min_neg_cause,X210min_effect-X15min_cause,X210min_effect-X30min_cause,X210min_effect-X90min_cause,X210min_effect-X120min_cause,X210min_effect-X180min_cause,X210min_effect-X210min_cause,X240min_effect-X0min_neg_cause,X240min_effect-X15min_cause,X240min_effect-X30min_cause,X240min_effect-X90min_cause,X240min_effect-X120min_cause,X240min_effect-X180min_cause,X240min_effect-X210min_cause,X240min_effect-X240min_cause,X360min_effect-X0min_neg_cause,X360min_effect-X15min_cause,X360min_effect-X30min_cause,X360min_effect-X90min_cause,X360min_effect-X120min_cause,X360min_effect-X180min_cause,X360min_effect-X210min_cause,X360min_effect-X240min_cause,X360min_effect-X360min_cause,Target
0,CTNNB1,CSRP2,1,BCR,0.0,0.258286,-0.040952,1.646857,-0.074884,-0.536817,0.712429,0.004945,0.433192,-0.344667,1.330743,0.084049,0.081471,-0.193222,0.07579,0.302063,0.032332,0.404781,-0.261072,0.382174,0.139955,0.471589,0.102163,0.645281,-0.572654,0.592275,0.289665,0.277296,1.004365,0.072261,0.619123,-0.436363,0.554037,0.268772,0.289592,0.731116,0.948849,-0.028561,-0.277744,0.352905,-0.459728,-0.190113,-0.34816,-0.376223,-0.32423,0.531021,1
1,CTNNB1,CSRP2,2,BCR,0.0,1.855353,0.38669,1.85361,0.402003,0.632198,0.800743,0.089057,-0.204024,-0.164598,0.537406,0.593482,0.584197,0.395299,-0.183978,1.384277,0.099596,-0.117455,-0.089068,0.271622,-0.21926,0.429347,0.796275,0.019424,0.012863,0.234264,0.39878,0.163299,3.604513,0.783394,0.580044,0.45373,-0.08733,0.268737,-0.015691,0.803291,1.680482,1.461912,1.231168,1.022377,-0.20075,-0.04709,-0.013835,1.494694,0.945993,8.09524,1
2,CTNNB1,CSRP2,3,BCR,0.0,2.424238,-0.189336,0.336505,-0.06925,0.093951,0.24652,0.135422,-0.257685,-0.392767,1.69679,0.199627,-0.119745,-0.179242,0.201025,0.470845,-0.112967,-0.028793,0.049267,0.076235,-0.117549,0.413344,0.407032,-0.158722,-0.202512,0.3774,0.208996,-0.08909,0.383871,0.820085,-0.010957,-0.15686,0.786826,0.520326,-0.184668,0.117646,5.311831,0.431805,-0.108607,-0.155356,0.363031,0.218603,-0.12794,0.280688,0.187717,0.473111,1
3,CTNNB1,DDIT4,1,BCR,0.002,0.292199,0.105321,0.510985,0.122684,0.378946,0.433839,0.046425,0.375678,0.282005,1.939397,0.184178,0.177948,0.216048,0.091588,0.321739,-0.018615,0.077717,0.074726,0.151343,0.047528,0.501953,0.29633,0.57538,0.540484,0.742259,0.412874,0.048114,1.569414,0.369645,0.677173,0.636695,0.858839,0.492032,-0.001227,1.413763,2.170484,0.308504,0.615041,0.562379,0.801305,0.38983,-0.11561,0.624933,0.806481,2.167724,1
4,CTNNB1,DDIT4,2,BCR,0.002,0.397289,0.047185,0.708738,-0.061738,0.156178,0.725773,0.012737,-0.200739,-0.240127,0.324054,-0.01079,-0.177104,-0.100049,0.105843,0.189795,-0.008936,-0.184815,-0.114929,0.124922,0.173583,0.196512,0.016151,-0.120636,-0.249531,0.164147,0.076286,0.091142,0.581367,0.01366,0.334662,0.35986,-0.259606,-0.087649,-0.108174,-0.402895,0.953361,0.427853,0.898817,0.878671,-0.295639,0.015626,-0.023013,-0.252624,0.399567,10.367887,1


In [123]:
pos.head()

Unnamed: 0,CauseGene,EffectGene,Replicate,Treatment,Pvalue,X0min_neg_effect-X0min_neg_cause,X15min_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X0min_neg_cause,X30min_effect-X15min_cause,X30min_effect-X30min_cause,X90min_effect-X0min_neg_cause,X90min_effect-X15min_cause,X90min_effect-X30min_cause,X90min_effect-X90min_cause,X120min_effect-X0min_neg_cause,X120min_effect-X15min_cause,X120min_effect-X30min_cause,X120min_effect-X90min_cause,X120min_effect-X120min_cause,X180min_effect-X0min_neg_cause,X180min_effect-X15min_cause,X180min_effect-X30min_cause,X180min_effect-X90min_cause,X180min_effect-X120min_cause,X180min_effect-X180min_cause,X210min_effect-X0min_neg_cause,X210min_effect-X15min_cause,X210min_effect-X30min_cause,X210min_effect-X90min_cause,X210min_effect-X120min_cause,X210min_effect-X180min_cause,X210min_effect-X210min_cause,X240min_effect-X0min_neg_cause,X240min_effect-X15min_cause,X240min_effect-X30min_cause,X240min_effect-X90min_cause,X240min_effect-X120min_cause,X240min_effect-X180min_cause,X240min_effect-X210min_cause,X240min_effect-X240min_cause,X360min_effect-X0min_neg_cause,X360min_effect-X15min_cause,X360min_effect-X30min_cause,X360min_effect-X90min_cause,X360min_effect-X120min_cause,X360min_effect-X180min_cause,X360min_effect-X210min_cause,X360min_effect-X240min_cause,X360min_effect-X360min_cause,Target
0,CTNNB1,A1BG,1,BCR,0.709,0.117989,0.022836,0.17079,-0.033154,0.01971,0.378114,0.003205,0.103265,0.075732,0.449985,0.049062,0.054099,0.00384,0.021155,0.104347,0.049378,0.166147,0.233241,0.243692,0.092152,1.238416,-0.114306,-0.044493,0.084933,0.002391,-0.040777,-0.054564,0.305541,0.199977,0.175644,-0.227884,0.091423,0.138637,-0.335847,-0.436954,3.234805,0.012592,0.111088,0.196558,0.177301,0.063691,0.479643,0.00678,-0.357354,0.6702,0
1,CTNNB1,A1BG,2,BCR,0.709,1.332299,-0.390249,0.844101,-0.468868,0.266832,0.4088,0.244777,-0.233214,-0.153916,0.273429,-0.258934,0.153824,0.122065,-0.065175,0.456599,-0.473069,0.500826,0.371395,-0.292616,0.146237,2.059543,-0.375452,0.069334,0.149088,-0.033965,0.104889,0.262062,0.656146,0.153118,-0.132576,-0.075916,0.131825,0.002116,-0.142718,0.039764,0.442776,0.709999,-0.186461,-0.236979,0.02098,0.009681,-0.029438,-0.678919,-0.07654,2.815529,0
2,CTNNB1,A1BG,3,BCR,0.709,4.213463,-0.261576,0.379323,-0.105102,0.06651,0.297369,-0.213173,-0.088036,0.079809,1.702568,-0.150083,0.199661,0.110179,-0.075,0.270138,0.560668,-0.146723,-0.186469,-0.016751,-0.158498,0.399003,0.657152,-0.0846,-0.227205,-0.150449,-0.106066,0.308908,0.556818,-1.380191,0.331504,0.479314,0.805011,0.363679,-0.308263,-0.391189,3.69274,-1.029577,0.220127,0.266778,0.460656,0.224054,-0.450158,-0.518529,0.55671,1.05129,0
3,CTNNB1,A1CF,1,BCR,0.913,0.69968,-0.234145,0.643357,0.055283,-0.226985,0.414899,0.221069,-0.58518,0.070237,2.526242,0.335483,-0.177675,0.12591,0.106389,0.440942,0.09119,0.002715,-0.077544,0.205138,0.02775,0.374763,0.376724,-0.379463,0.168585,0.633075,0.266577,0.060692,0.750528,0.542812,-0.525997,0.365564,0.888889,0.427745,-0.059854,0.430637,2.325845,0.300907,-0.427889,0.188174,0.591993,0.225562,0.010446,0.312639,0.341009,0.57124,0
4,CTNNB1,A1CF,2,BCR,0.913,0.582706,-0.159712,1.538769,0.087349,0.151241,0.458628,-0.317141,0.215411,-0.009071,0.486535,-0.169112,0.226475,0.03876,0.200493,0.52772,-0.15466,0.28133,0.070736,0.201205,0.458229,0.790678,-0.477541,0.015755,-0.272334,0.399459,0.234595,0.291973,1.822632,-0.015011,-0.273546,-0.306196,-0.035866,-0.038202,-0.063724,0.213603,0.544045,0.87785,-0.046169,0.471888,-0.333094,0.020769,-0.032177,-1.931072,-0.319364,6.724956,0


In [56]:
Bags['NegGenePairsBag'].max()

102453

In [53]:
pd.Series(Bags['NegGenePairsBag'].reshape(-1)).describe()

count    300000.000000
mean      51157.975133
std       29609.899429
min           1.000000
25%       25502.750000
50%       51033.500000
75%       76838.250000
max      102453.000000
dtype: float64

In [76]:
Bags['PosGenePairsBag'].shape

(1000, 300)

In [75]:
Bags['NegGenePairsBag']

array([[ 9912,  1424, 29759, ..., 86424, 92892, 36478],
       [56573, 51492, 18942, ..., 34620, 95260, 66810],
       [65690, 25794, 94832, ..., 12389, 47730, 86603],
       ...,
       [61259, 79579, 71730, ..., 48009, 98557, 21714],
       [20385, 32147, 91170, ..., 95042, 86412, 74688],
       [62931, 74293, 77621, ..., 64121, 50061, 29782]], dtype=int32)

In [73]:
Bags

{'NegGenePairsBag': array([[ 9912,  1424, 29759, ..., 86424, 92892, 36478],
        [56573, 51492, 18942, ..., 34620, 95260, 66810],
        [65690, 25794, 94832, ..., 12389, 47730, 86603],
        ...,
        [61259, 79579, 71730, ..., 48009, 98557, 21714],
        [20385, 32147, 91170, ..., 95042, 86412, 74688],
        [62931, 74293, 77621, ..., 64121, 50061, 29782]], dtype=int32),
 'PosGenePairsBag': array([[  33, 2167,  997, ..., 1799,  263, 1918],
        [  98, 1855, 1640, ..., 2681, 1751, 1895],
        [1798, 1538, 1367, ..., 2361,  947, 2416],
        ...,
        [ 810, 2742, 1793, ..., 1668,  801, 2063],
        [1249,  296, 2093, ..., 1672,  202,  399],
        [1730, 1238, 1741, ...,   45, 1558,   55]], dtype=uint16),
 '__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Tue May 19 14:24:34 2015',
 '__version__': '1.0'}

In [72]:
FeatRank

Unnamed: 0,Run,Method,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45
0,1,Entropy,3,5,4,10,2,21,20,30,31,42,19,33,32,17,6,14,40,8,29,39,24,41,38,18,23,22,45,37,25,36,26,11,7,15,12,9,34,13,35,16,27,1,43,28,44
1,1,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,37,29,6,31,30,16,20,15,27,34,40,33,9,12,38,23,19,35,11,25,42,32,7,36,1,26,14,41,45,13,44,43,28
2,1,Brattacharyya,3,4,5,10,2,21,20,6,30,31,17,42,19,8,33,39,32,29,24,22,40,37,18,14,38,23,36,41,15,45,25,11,26,7,9,12,16,34,1,35,13,27,43,28,44
3,1,Wilcoxon,4,17,8,3,10,5,2,22,39,21,37,18,24,6,29,20,12,45,15,30,26,16,31,1,19,34,23,38,9,33,27,11,25,32,40,7,28,36,35,41,13,42,14,44,43
4,2,Entropy,3,5,4,10,2,21,20,30,31,42,19,33,32,17,6,14,8,40,29,39,24,41,38,18,23,22,37,45,25,26,36,11,15,7,12,9,34,13,35,16,27,1,43,28,44
5,2,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,29,37,6,31,30,16,20,15,27,40,34,9,12,33,38,23,35,11,19,25,32,42,7,1,36,26,45,14,41,13,44,28,43
6,2,Brattacharyya,3,4,5,10,2,21,20,6,30,31,17,42,19,8,39,33,29,32,24,22,37,40,18,14,38,15,41,23,36,45,25,11,26,7,12,9,16,34,1,13,35,27,28,43,44
7,2,Wilcoxon,4,17,8,3,10,5,2,22,39,21,37,18,24,6,29,20,12,45,30,15,31,26,16,1,34,23,19,38,9,33,27,11,25,32,40,7,35,36,28,13,44,41,43,42,14
8,3,Entropy,3,5,4,10,2,20,21,30,31,19,42,33,32,17,6,14,8,40,29,39,24,38,18,41,23,22,37,25,36,26,45,7,11,34,12,9,15,13,35,16,27,1,43,28,44
9,3,Ttest,4,3,17,8,10,2,39,5,24,18,21,22,29,37,6,31,30,16,20,27,15,34,40,9,33,12,38,23,11,19,25,35,32,42,7,1,36,26,45,14,41,13,44,43,28


In [31]:
X.head()

Unnamed: 0,CauseGene,EffectGene,Replicate,Treatment,Pvalue,X0min_neg_effect-X0min_neg_cause,X15min_effect-X0min_neg_cause,X15min_effect-X15min_cause,X30min_effect-X0min_neg_cause,X30min_effect-X15min_cause,...,X90min_effect-X90min_cause,X120min_effect-X0min_neg_cause,X120min_effect-X15min_cause,X120min_effect-X30min_cause,X120min_effect-X90min_cause,X120min_effect-X120min_cause,X180min_effect-X0min_neg_cause,X180min_effect-X15min_cause,X180min_effect-X30min_cause,Target
0,CTNNB1,CSRP2,1,BCR,0.0,0.258286,-0.040952,1.646857,-0.074884,-0.536817,...,1.330743,0.084049,0.081471,-0.193222,0.07579,0.302063,0.032332,0.404781,-0.261072,1
1,CTNNB1,CSRP2,2,BCR,0.0,1.855353,0.38669,1.85361,0.402003,0.632198,...,0.537406,0.593482,0.584197,0.395299,-0.183978,1.384277,0.099596,-0.117455,-0.089068,1
2,CTNNB1,CSRP2,3,BCR,0.0,2.424238,-0.189336,0.336505,-0.06925,0.093951,...,1.69679,0.199627,-0.119745,-0.179242,0.201025,0.470845,-0.112967,-0.028793,0.049267,1
3,CTNNB1,DDIT4,1,BCR,0.002,0.292199,0.105321,0.510985,0.122684,0.378946,...,1.939397,0.184178,0.177948,0.216048,0.091588,0.321739,-0.018615,0.077717,0.074726,1
4,CTNNB1,DDIT4,2,BCR,0.002,0.397289,0.047185,0.708738,-0.061738,0.156178,...,0.324054,-0.01079,-0.177104,-0.100049,0.105843,0.189795,-0.008936,-0.184815,-0.114929,1


In [69]:
X.columns

Index(['CauseGene', 'EffectGene', 'Replicate', 'Treatment', 'Pvalue',
       'X0min_neg_effect-X0min_neg_cause', 'X15min_effect-X0min_neg_cause',
       'X15min_effect-X15min_cause', 'X30min_effect-X0min_neg_cause',
       'X30min_effect-X15min_cause', 'X30min_effect-X30min_cause',
       'X90min_effect-X0min_neg_cause', 'X90min_effect-X15min_cause',
       'X90min_effect-X30min_cause', 'X90min_effect-X90min_cause',
       'X120min_effect-X0min_neg_cause', 'X120min_effect-X15min_cause',
       'X120min_effect-X30min_cause', 'X120min_effect-X90min_cause',
       'X120min_effect-X120min_cause', 'X180min_effect-X0min_neg_cause',
       'X180min_effect-X15min_cause', 'X180min_effect-X30min_cause', 'Target'],
      dtype='object')

In [71]:
print(mat['DatasetTrn'][0,0][4][0].strip().split(';'))

['CauseGene', 'EffectGene', 'Replicate', 'Treatment', 'Pvalue', 'X0min_neg_effect-X0min_neg_cause', 'X15min_effect-X0min_neg_cause', 'X15min_effect-X15min_cause', 'X30min_effect-X0min_neg_cause', 'X30min_effect-X15min_cause', 'X30min_effect-X30min_cause', 'X90min_effect-X0min_neg_cause', 'X90min_effect-X15min_cause', 'X90min_effect-X30min_cause', 'X90min_effect-X90min_cause', 'X120min_effect-X0min_neg_cause', 'X120min_effect-X15min_cause', 'X120min_effect-X30min_cause', 'X120min_effect-X90min_cause', 'X120min_effect-X120min_cause', 'X180min_effect-X0min_neg_cause', 'X180min_effect-X15min_cause', 'X180min_effect-X30min_cause', 'X180min_effect-X90min_cause', 'X180min_effect-X120min_cause', 'X180min_effect-X180min_cause', 'X210min_effect-X0min_neg_cause', 'X210min_effect-X15min_cause', 'X210min_effect-X30min_cause', 'X210min_effect-X90min_cause', 'X210min_effect-X120min_cause', 'X210min_effect-X180min_cause', 'X210min_effect-X210min_cause', 'X240min_effect-X0min_neg_cause', 'X240min_effec