In [6]:
import os
import numpy as np
import pandas as pd 
import glob
from sklearn.linear_model import LassoCV, ElasticNetCV
os.chdir('/mnt/BioHome/jreyna/jreyna/projects/cmi-pb-preds/')

# setting the data directory
data_dir = 'results/main/cmi_pb_datasets/processed/harmonized/'

# setting the output dir
output_dir = 'results/main/submissions/lr_model/'

In [7]:
model_dict = {'lasso_cv': LassoCV,
              'elastic_net_cv': ElasticNetCV}

In [8]:
cmodel = 'lasso_cv'
cmodel_function = model_dict[cmodel]

## Loading the loading matrices

In [9]:
loadings = {} 
tpl = '/mnt/BioAdHoc/Groups/vd-ay/jreyna/projects/cmi-pb-momix/results/factors/*.loading.tsv'
for loading_fn in glob.glob(tpl):
    
    bn = os.path.basename(loading_fn)
    assay = bn.split('.')[0]
    loadings_matrix = pd.read_table(loading_fn)
    loadings[assay] = loadings_matrix

## Listing the shared subjects

In [10]:
# # get the shared subjects 
# shared_subjects = rnaseq_raw.columns.tolist()
# shared_subjects = [int(x.replace('X', '')) for x in shared_subjects]
shared_subjects = [4, 6, 11, 15, 20, 21, 26, 31, 33, 44, 47, 48, 52]

## Loading the input data

In [11]:
# setting up dictionaries to load data and results
train_features = {}
train_outcomes = {}

test_features = {}
test_preds = {}

#### Training Features (calculating the reduce form of each omic)

In [12]:
tpl = '/mnt/BioAdHoc/Groups/vd-ay/jreyna/projects/cmi-pb-momix/'
tpl += 'results/input_data/*.2020.momix.day0.input.tsv.harmonized.tsv'
for raw_fn in glob.glob(tpl):  
    
    bn = os.path.basename(raw_fn)
    assay = bn.split('.')[0]
    
    # Loading the raw matrices
    raw = pd.read_table(raw_fn, index_col=0, header=0)
    
    ## Calculating the sample factor matrix
    raw_array = np.matrix(raw.T.values)
    loadings_array = np.matrix(loadings[assay].values)
    sample_factors = raw_array * loadings_array
    train_features[assay] = pd.DataFrame(sample_factors) 
    train_features[assay].index = raw.columns.str.replace('X', '').astype(int)

In [13]:
train_features[assay]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
4,18.501266,18.816058,-3.279093,8.135793,3.220009,3.029462,-6.89204,2.201627,-4.153506,-8.720384
6,24.609332,38.022989,-8.853925,14.840321,0.536037,2.142527,-8.789163,2.527824,-5.320153,-6.777567
11,52.172205,50.884542,-6.887389,15.646211,9.970164,1.971205,-14.224462,2.914185,-5.097582,-11.581859
15,25.577545,33.840872,-5.116219,16.841497,5.816615,6.939347,-4.275021,5.662264,-8.614458,-15.359694
20,12.453402,24.184701,-4.786949,14.51538,-5.25002,6.674168,-9.978196,3.028039,-7.871834,-10.947127
21,11.587124,20.975921,-5.558505,12.942014,-2.59688,4.145574,-8.852224,3.371696,-5.715736,-10.812218
26,57.019845,48.079507,-9.394332,13.408658,11.035178,1.233398,-21.595454,0.806174,-7.564238,-15.049903
31,50.42513,46.104916,-4.293778,16.503302,13.496684,4.615812,-11.364364,5.498322,-4.111305,-16.78631
33,34.069801,33.434131,-7.800893,15.008086,7.400679,1.936668,-11.309345,5.314118,-6.536328,-15.405857
44,19.944982,34.438522,-7.527921,13.609578,1.174292,3.956342,-7.418634,1.702294,-4.243962,-6.407255


In [14]:
train_features['final'] = pd.concat([train_features['cytof'], train_features['olink'], train_features['rnaseq']], axis=1)

In [15]:
train_features['final']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1
4,18.501266,18.816058,-3.279093,8.135793,3.220009,3.029462,-6.89204,2.201627,-4.153506,-8.720384,...,-4027.395473,3470.278294,-3164.722691,4402.980371,5073.610276,-3892.01796,9197.172158,-3889.231413,4762.10613,1914.052108
6,24.609332,38.022989,-8.853925,14.840321,0.536037,2.142527,-8.789163,2.527824,-5.320153,-6.777567,...,-7694.804155,8809.059977,-7856.745104,4674.646648,5980.813585,-5620.97028,10803.205709,-5438.147168,6390.215962,2172.551774
11,52.172205,50.884542,-6.887389,15.646211,9.970164,1.971205,-14.224462,2.914185,-5.097582,-11.581859,...,-3650.049242,5188.642002,-3595.21702,4991.843631,3262.330491,-6251.516829,10074.69597,-4945.532206,4322.081167,2151.388233
15,25.577545,33.840872,-5.116219,16.841497,5.816615,6.939347,-4.275021,5.662264,-8.614458,-15.359694,...,-7700.37635,6570.49641,-4900.737756,5411.270058,5648.303522,-6610.346402,8897.277772,-4884.259427,4513.443262,-29.343774
20,12.453402,24.184701,-4.786949,14.51538,-5.25002,6.674168,-9.978196,3.028039,-7.871834,-10.947127,...,-5521.089057,3480.37201,-2446.570315,7437.423017,2824.305178,-3539.952476,4673.83221,-5126.497022,7034.08814,1522.442667
21,11.587124,20.975921,-5.558505,12.942014,-2.59688,4.145574,-8.852224,3.371696,-5.715736,-10.812218,...,-9680.896055,6303.130749,-5043.143766,5171.776211,7114.735375,-6273.66927,8436.363283,-4343.640211,4997.923165,-878.890662
26,57.019845,48.079507,-9.394332,13.408658,11.035178,1.233398,-21.595454,0.806174,-7.564238,-15.049903,...,-1762.4742,4456.083037,-4656.021104,4760.160133,3017.480411,-3950.449351,10231.932954,-5372.35026,5557.139811,2978.995679
31,50.42513,46.104916,-4.293778,16.503302,13.496684,4.615812,-11.364364,5.498322,-4.111305,-16.78631,...,-2252.74457,3830.033266,-4271.984619,5256.376294,3981.507902,-2888.945472,9870.946013,-4401.013575,6568.707462,2750.60497
33,34.069801,33.434131,-7.800893,15.008086,7.400679,1.936668,-11.309345,5.314118,-6.536328,-15.405857,...,-4896.360183,4614.238071,-5272.109758,6300.603736,5625.017362,-3740.059241,8893.73622,-4661.05374,6842.548347,2454.976267
44,19.944982,34.438522,-7.527921,13.609578,1.174292,3.956342,-7.418634,1.702294,-4.243962,-6.407255,...,-11694.977017,9533.341845,-7778.66827,5608.081254,8227.010211,-5608.294793,6026.918241,-5760.060499,6854.529859,1121.038707


#### Training Outcomes

In [16]:
fn = os.path.join(data_dir, 'task_matrix.common_names.mfi_normalised.tsv')
train_outcomes = pd.read_table(fn)
train_outcomes = train_outcomes.loc[train_outcomes.subject_id.isin(shared_subjects)]

In [17]:
train_outcomes.shape

(13, 15)

#### Testing Features

In [18]:
test_features = {} 
shared_subjects_2021 = set()
tpl = 'results/main/cmi_pb_datasets/processed/harmonized/*.2021.day0.pivoted.tsv'

i = 0 
for raw_fn in glob.glob(tpl):  
    
    # get the assay name
    bn = os.path.basename(raw_fn)
    assay = bn.split('.')[0]
    
    if assay in ['abtiters']:
        continue
    
    print(raw_fn)
    print(assay)
    
    # loading the raw matrices
    raw = pd.read_table(raw_fn, index_col=0, header=0)
    
    print(raw.shape)
    
    raw = raw.loc[:, raw.columns.isin(loadings[assay].index)]
    
    print(raw.shape)
    
    # getting the loadings matrix 
        
    ## Calculating the sample factor matrix
    raw_array = np.matrix(raw.values)
    loadings_array = np.matrix(loadings[assay].values)
    sample_factors = raw_array * loadings_array
    
    # add to the test_features dict 
    test_features[assay] = pd.DataFrame(sample_factors)
    test_features[assay].index = raw.index
    
    
    if i == 0:
        shared_subjects_2021 = set(test_features[assay].index)
    else:
        shared_subjects_2021 = shared_subjects_2021.intersection(test_features[assay].index)
    i += 1 


results/main/cmi_pb_datasets/processed/harmonized/cytof.2021.day0.pivoted.tsv
cytof
(33, 22)
(33, 21)
results/main/cmi_pb_datasets/processed/harmonized/olink.2021.day0.pivoted.tsv
olink
(36, 27)
(36, 23)
results/main/cmi_pb_datasets/processed/harmonized/rnaseq.2021.day0.pivoted.tsv
rnaseq
(36, 11589)
(36, 11589)


In [19]:
# harmonize the samples
for assay in test_features.keys():
    test_features[assay] =  test_features[assay].loc[test_features[assay].index.isin(shared_subjects_2021), :]

In [20]:
test_features['final'] = pd.concat([test_features['cytof'], test_features['olink'], test_features['rnaseq']], axis=1)
test_features['final'] = test_features['final'].dropna()

In [21]:
test_features['final']

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,0,1,2,3,4,5,6,7,8,9
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
63,44.69643,41.690035,-7.214139,23.240366,2.136601,12.748282,-22.850137,4.15468,-17.358799,-28.783088,...,-24198.611508,16641.592266,-16105.339212,7809.653192,12583.973239,-9183.781067,4507.451777,-5874.798718,9232.008984,-851.385512
64,27.856438,28.321223,-4.73347,22.713538,0.427908,6.562069,-11.011902,11.134754,-11.759368,-24.541905,...,-20716.746505,13193.792111,-13970.507479,5927.462313,12586.635282,-7826.377853,6122.863495,-3018.074077,7044.278269,-1310.331764
65,39.005592,37.369216,-9.295215,19.400767,-0.162314,8.435066,-23.237163,3.074196,-16.716408,-24.527929,...,-31886.823701,23034.444879,-20589.033015,6896.43524,17575.944583,-10886.897584,7114.882771,-6938.423594,8402.858284,-3813.840449
66,38.723471,39.569805,-4.56211,26.169709,-1.282023,11.690512,-18.911311,9.955194,-16.342605,-30.426337,...,-38865.544701,19261.950517,-11031.164992,16907.732696,9444.673664,-23515.232787,-11203.361272,-7519.303804,9564.115349,-10028.204476
67,25.023606,38.446371,-10.840939,17.85365,-7.507952,9.611313,-20.515549,-1.742217,-16.074133,-14.301677,...,-45672.895004,28402.520941,-21145.572672,13396.549258,15655.903522,-22067.0668,-3785.931689,-9996.543005,10098.653913,-9590.682143
68,44.522318,39.178696,-8.926077,20.901548,0.434296,9.260969,-25.870116,3.743994,-18.201845,-27.759923,...,-18221.687726,12525.742739,-15274.545218,6134.48714,10470.131598,-6089.268631,5755.02713,-3234.539895,8445.80689,248.238093
69,45.98604,42.091344,-6.03289,23.92308,1.148144,12.244099,-24.006706,6.468073,-16.674462,-30.104523,...,-22749.560879,14822.906248,-14121.552907,8984.465515,10145.187312,-8837.515832,198.092263,-4837.286619,9379.026422,-376.944846
71,40.904771,36.057161,-10.032878,19.699004,2.231635,7.131895,-24.470696,4.410395,-13.204925,-26.294748,...,-18966.645591,11632.358761,-12901.371958,8902.279381,10318.734066,-8122.708136,6216.983779,-5418.033784,9460.780788,710.752771
72,21.92583,29.11588,-7.804056,17.471738,-0.839963,9.088636,-11.989895,3.045577,-16.415086,-19.279431,...,-27299.091561,14649.032377,-12176.840075,13720.622507,7789.541965,-14294.473048,-5795.95199,-5570.025931,10706.272286,-3892.64463
73,41.6981,40.364958,-5.614934,24.033664,1.969619,13.88644,-20.656663,5.422852,-16.708905,-29.182099,...,-20486.861672,11114.787903,-10609.594113,8440.495837,8382.440185,-10050.377136,-190.957874,-2662.102127,7970.672674,-2953.076545


## Building Lists of Tasks Cased on Assay Type

In [22]:
abtiter_tasks = ['IgG-PT_day14', 'IgG-FHA_day14', 'IgG-PRN_day14']
cytof_tasks = ['Monocytes_day1', 'ASCs (Plasmablasts)_day7', 'CD4Tcells_day3']
rnaseq_tasks = ['CCL3_day3', 'IL-6_day3', 'NFKBIA_day7', 'XIST_day14']

## Make predictions for the Ab Titers

In [23]:
ctrain_features = train_features['final']
ctest_features = test_features['final']

In [24]:
for task in abtiter_tasks:
    
    print(task)

    # get the outcome vector 
    ctrain_outcome = train_outcomes[['subject_id', task]]
    
    # get the shared subjects 
    shared_subjects = set(ctrain_features.index.tolist()).intersection(ctrain_outcome.subject_id)
 
    # extract the shared subjects  
    xdata = ctrain_features.loc[ctrain_features.index.isin(shared_subjects)]
    ydata = ctrain_outcome.loc[ctrain_outcome.subject_id.isin(shared_subjects)]   
        
    # building the model
    lr_model = cmodel_function()
    lr_model.fit(xdata.values, ydata.iloc[:, 1].values)
    preds = lr_model.predict(ctest_features.values)
    
    # create the ranks df
    ranks = [ctest_features.index.tolist(), np.argsort(preds)]
    ranks = list(zip(*ranks))
    ranks = pd.DataFrame(ranks, columns=['subject_id', 'rank'])
    test_preds[task] = ranks


IgG-PT_day14


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

IgG-FHA_day14


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

IgG-PRN_day14


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

## Make predictions for the CyTOF

In [25]:
for task in cytof_tasks:
    
    print(task)

    # get the outcome vector 
    ctrain_outcome = train_outcomes[['subject_id', task]]
    
    # get the shared subjects 
    shared_subjects = set(ctrain_features.index.tolist()).intersection(ctrain_outcome.subject_id)
 
    # extract the shared subjects  
    xdata = ctrain_features.loc[ctrain_features.index.isin(shared_subjects)]
    ydata = ctrain_outcome.loc[ctrain_outcome.subject_id.isin(shared_subjects)]   
        
    # building the model
    lr_model = cmodel_function()
    lr_model.fit(xdata.values, ydata.iloc[:, 1].values)
    preds = lr_model.predict(ctest_features.values)
    
    # create the ranks df
    ranks = [ctest_features.index.tolist(), np.argsort(preds)]
    ranks = list(zip(*ranks))
    ranks = pd.DataFrame(ranks, columns=['subject_id', 'rank'])
    test_preds[task] = ranks

  model = cd_fast.enet_coordinate_descent(


Monocytes_day1


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


ASCs (Plasmablasts)_day7


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

CD4Tcells_day3


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


## Make predictions for the RNA-seq based tasks

In [26]:
ctrain_features = train_features['rnaseq']
ctest_features = test_features['rnaseq']

In [27]:
for task in rnaseq_tasks:
    
    print(task)

    # get the outcome vector 
    ctrain_outcome = train_outcomes[['subject_id', task]]
    
    # get the shared subjects 
    shared_subjects = set(ctrain_features.index.tolist()).intersection(ctrain_outcome.subject_id)
 
    # extract the shared subjects  
    xdata = ctrain_features.loc[ctrain_features.index.isin(shared_subjects)]
    ydata = ctrain_outcome.loc[ctrain_outcome.subject_id.isin(shared_subjects)]   
        
    # building the model
    lr_model = cmodel_function()
    lr_model.fit(xdata.values, ydata.iloc[:, 1].values)
    preds = lr_model.predict(ctest_features.values)
    
    # create the ranks df
    ranks = [ctest_features.index.tolist(), np.argsort(preds)]
    ranks = list(zip(*ranks))
    ranks = pd.DataFrame(ranks, columns=['subject_id', 'rank'])
    test_preds[task] = ranks
    

CCL3_day3
IL-6_day3


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  mode

NFKBIA_day7
XIST_day14


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_

In [28]:
test_preds.keys()

dict_keys(['IgG-PT_day14', 'IgG-FHA_day14', 'IgG-PRN_day14', 'Monocytes_day1', 'ASCs (Plasmablasts)_day7', 'CD4Tcells_day3', 'CCL3_day3', 'IL-6_day3', 'NFKBIA_day7', 'XIST_day14'])

## Save predictions to the Excel File

In [29]:
form_fn = 'results/main/submissions/forms/2020_submission_format.tsv'
form = pd.read_table(form_fn)

In [30]:
# creating a mapper between the task names for the data and the form
task_form_mapper = {'IgG-PT_day14': 'B1.1.a) Pertussis Toxin ',
                    'IgG-FHA_day14': 'B1.1.b) FHA',       
                    'IgG-PRN_day14': 'B1.1.c) Pertactin',  
                    'Monocytes_day1': 'B.2.c) Monocytes on day 1',
                    'ASCs (Plasmablasts)_day7': 'B.2.a) Plasmablast cells on day 7',
                    'CD4Tcells_day3': 'B.2.b) CD4 TCM cells on 3 days',
                    'CCL3_day3': 'B.3.a) CCL3 on day 3',
                    'IL-6_day3': 'B.3.b) IL-6 on day 3',
                    'NFKBIA_day7': 'B.3.c) NFKBIA at day 7',
                    'XIST_day14': 'B.3.d) XIST on day 14'}

In [31]:
# filling in the form
complete_form = form.copy()

In [32]:
for (task_name, form_name) in task_form_mapper.items():
    print(task_name, '-----------------', form_name)
     
    if task_name in test_preds:
    
        cranks = test_preds[task_name]

        # locate the indexes of the subjects within the form
        form_subject_indexes = form.Subject_ID.isin(cranks.subject_id.tolist())
        form_subject_indexes = form.Subject_ID[form_subject_indexes].index.tolist()

        # update the form for the current taskname
        complete_form.loc[form_subject_indexes, form_name] = cranks['rank'].astype(int)
    

IgG-PT_day14 ----------------- B1.1.a) Pertussis Toxin 
IgG-FHA_day14 ----------------- B1.1.b) FHA
IgG-PRN_day14 ----------------- B1.1.c) Pertactin
Monocytes_day1 ----------------- B.2.c) Monocytes on day 1
ASCs (Plasmablasts)_day7 ----------------- B.2.a) Plasmablast cells on day 7
CD4Tcells_day3 ----------------- B.2.b) CD4 TCM cells on 3 days
CCL3_day3 ----------------- B.3.a) CCL3 on day 3
IL-6_day3 ----------------- B.3.b) IL-6 on day 3
NFKBIA_day7 ----------------- B.3.c) NFKBIA at day 7
XIST_day14 ----------------- B.3.d) XIST on day 14


In [33]:
complete_form

Unnamed: 0,Subject_ID,Age,Biological_Sex_at_Birth,Vaccine_Priming_Status,B1.1.a) Pertussis Toxin,B1.1.b) FHA,B1.1.c) Pertactin,B.1.2.a) IgG1 - Pertussis toxin,B.1.2.b) IgG1 - FHA,B.1.2.c) IgG4 - Pertussis toxin,B.1.2.d) IgG4 - FHA,B.2.a) Plasmablast cells on day 7,B.2.b) CD4 TCM cells on 3 days,B.2.c) Monocytes on day 1,B.3.a) CCL3 on day 3,B.3.b) IL-6 on day 3,B.3.c) NFKBIA at day 7,B.3.d) XIST on day 14
0,61,32,Female,wP,,,,,,,,,,,,,,
1,62,25,Female,wP,,,,,,,,,,,,,,
2,63,23,Female,wP,28.0,25.0,5.0,,,,,19.0,25.0,28.0,29.0,29.0,29.0,26.0
3,64,25,Male,wP,27.0,14.0,7.0,,,,,11.0,14.0,5.0,3.0,3.0,27.0,15.0
4,65,28,Male,wP,26.0,19.0,0.0,,,,,20.0,19.0,10.0,27.0,27.0,3.0,7.0
5,66,42,Female,wP,25.0,27.0,16.0,,,,,29.0,2.0,7.0,4.0,9.0,30.0,20.0
6,67,47,Female,wP,24.0,2.0,11.0,,,,,9.0,27.0,13.0,9.0,4.0,9.0,28.0
7,68,47,Male,wP,23.0,31.0,8.0,,,,,17.0,31.0,30.0,30.0,10.0,32.0,2.0
8,69,29,Female,wP,22.0,22.0,12.0,,,,,30.0,22.0,9.0,32.0,30.0,10.0,32.0
9,70,21,Male,aP,,,,,,,,,,,10.0,16.0,16.0,27.0


In [28]:
outfn = os.path.join(output_dir, 'Completed_Predictions.tsv')

In [29]:
complete_form.to_csv(outfn, sep='\t', float_format='%.0f', index=False, header=True)