# Automatic model selection based on linear, periodic and RBF kernels

The test is run on the first 20 spatially varied genes inferred from 'GPcounts_spatial.ipynb'. 

In [1]:
import pandas as pd 
import numpy as np 
import gpflow
import tensorflow as tf
from GPcounts import NegativeBinomialLikelihood
from GPcounts.GPcounts_Module import Fit_GPcounts
from matplotlib import pyplot as plt

In [2]:
Y = pd.read_csv('../data/MouseOB/mouse_ob_SV_genes.csv', index_col=[0]) # File with all the spatially varied genes inferred from 'GPcounts_spatial.ipynb'
Y_total = pd.read_csv('../data/MouseOB/Rep11_MOB_0.csv', index_col=[0]) #  File with the MOUSE-OB counts dataset for all the genes
scale = pd.read_csv('../data/MouseOB/scales_nb_wholedataset.txt', sep="\t") #  File with the scale factors for all the genes. This is to extract the scale factors for the particular spatially varied genes for the purposes of this analysis

In [3]:
spatial_locations = pd.DataFrame(index=Y.index)
spatial_locations['x'] = Y.index.str.split('x').str.get(0).map(float)
spatial_locations['y'] = Y.index.str.split('x').str.get(1).map(float)


In [4]:
spatial_locations['total_counts'] = Y.sum(1)
Y = Y.loc[spatial_locations.index]
X = spatial_locations[['x','y']]

In [5]:
# In this cell we extract the particular scale factors for the spatially varied genes and we named it as "scale_nb_model_sel"
names = []
for col in Y_total:
        names.append(col)
scale_nb_model_sel = scale.set_axis(names, axis=1, inplace=False)

scale_nb_new = []
for col in Y:
        scale_nb_new.append(scale_nb_model_sel[col])

scale_nb_model_sel = pd.DataFrame(scale_nb_new)  

In [6]:
# The final scale factors for the spatially varied genes are scale_nb_model_sel_tr, saved as "scale_nb_model_sel.csv"
scale_nb_model_sel_tr = scale_nb_model_sel.T
scale_nb_model_sel_tr.to_csv('scale_nb_model_sel.csv')
scale_nb_model_sel_tr

Unnamed: 0,Glul,Sparcl1,Calm2,Cpe,Snap25,Ndrg4,Eef1a1,Ckb,Gng13,S100a5,...,Ywhag,Nptn,Calm3,Ptma,Rtn1,Stmn3,Hnrnpa2b1,Atp5g3,Cox4i1,Cd81
0,66.989928,73.227781,51.426685,64.440720,56.936740,56.361118,52.728510,59.573597,3.487625,8.867573,...,21.785099,17.866144,19.342966,16.985675,20.437024,15.047219,22.214694,18.060745,22.028045,17.719189
1,131.876393,144.156233,101.238588,126.858021,112.085684,110.952514,103.801362,117.276602,6.865740,17.456707,...,42.886152,35.171297,38.078570,33.438005,40.232332,29.621961,43.731854,35.554389,43.364416,34.882001
2,44.461489,48.601578,34.132101,42.769570,37.789148,37.407105,34.996128,39.539240,2.314751,5.885444,...,14.458859,11.857833,12.838006,11.273462,13.564136,9.986901,14.743983,11.986991,14.620104,11.760298
3,80.940518,88.477398,62.136244,77.860439,68.793763,68.098268,63.709173,71.979743,4.213920,10.714237,...,26.321825,21.586752,23.371121,20.522926,24.693015,18.180789,26.840883,21.821879,26.615365,21.409194
4,66.458714,72.647103,51.018884,63.929721,56.485246,55.914188,52.310386,59.101193,3.459969,8.797255,...,21.612348,17.724470,19.189581,16.850983,20.274963,14.927899,22.038537,17.917528,21.853368,17.578680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,102.552702,112.102029,78.727439,98.650202,87.162604,86.281402,80.720362,91.199282,5.339092,13.575079,...,33.350099,27.350699,29.611518,26.002817,31.286375,23.035299,34.007753,27.648607,33.722018,27.125730
256,91.325917,99.829848,70.108885,87.850637,77.620624,76.835891,71.883636,81.215394,4.754604,12.088970,...,29.699153,24.356527,26.369847,23.156202,27.861352,20.513548,30.284811,24.621823,30.030357,24.156187
257,97.415262,106.486210,74.783541,93.708261,82.796140,81.959083,76.676626,86.630599,5.071627,12.895027,...,31.679406,25.980549,28.128111,24.700190,29.719065,21.881332,32.304114,26.263534,32.032693,25.766851
258,59.987894,65.573744,46.051378,57.705138,50.985502,50.470046,47.217132,53.346745,3.123086,7.940701,...,19.508040,15.998709,17.321168,15.210269,18.300871,13.474429,19.892732,16.172970,19.725593,15.867114


In [7]:
Y = Y.T

In [8]:
Y_run = Y.iloc[0:20,:]  # Run model_selection for the first 20 spatially varied genes  

In [9]:
gene_name = []
# scale = pd.read_csv('../data/MouseOB/scale_nb_model_sel.csv',index_col=[0]) # load the file with the scale factors  
scale = scale_nb_model_sel_tr
nb_scaled = True # set the nb_scaled argument to True to pass the scale factors 
gene_name = Y_run.index
likelihood = 'Negative_binomial' 
gp_counts = Fit_GPcounts(X,Y_run.loc[gene_name], scale = scale, nb_scaled=nb_scaled,safe_mode=False)

In [10]:
results = gp_counts.Model_selection_test(likelihood)
results

  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with RBF Kernel


  5%|▌         | 1/20 [00:10<03:27, 10.92s/it]

Fitting GP with RBF Kernel


 10%|█         | 2/20 [00:23<03:25, 11.44s/it]

Fitting GP with RBF Kernel


 15%|█▌        | 3/20 [00:36<03:22, 11.94s/it]

Fitting GP with RBF Kernel


 20%|██        | 4/20 [00:59<04:03, 15.22s/it]

Fitting GP with RBF Kernel


 25%|██▌       | 5/20 [01:25<04:35, 18.37s/it]

Fitting GP with RBF Kernel


 30%|███       | 6/20 [01:50<04:44, 20.34s/it]

Fitting GP with RBF Kernel


 35%|███▌      | 7/20 [02:20<05:03, 23.33s/it]

Fitting GP with RBF Kernel


 40%|████      | 8/20 [03:19<06:48, 34.01s/it]

Fitting GP with RBF Kernel


 45%|████▌     | 9/20 [03:27<04:49, 26.35s/it]

Fitting GP with RBF Kernel


 50%|█████     | 10/20 [03:35<03:26, 20.70s/it]

Fitting GP with RBF Kernel


 55%|█████▌    | 11/20 [03:42<02:28, 16.48s/it]

Fitting GP with RBF Kernel


 60%|██████    | 12/20 [03:48<01:48, 13.56s/it]

Fitting GP with RBF Kernel


 65%|██████▌   | 13/20 [03:56<01:21, 11.68s/it]

Fitting GP with RBF Kernel


 70%|███████   | 14/20 [04:06<01:06, 11.16s/it]

Fitting GP with RBF Kernel


 75%|███████▌  | 15/20 [04:16<00:55, 11.09s/it]

Fitting GP with RBF Kernel


 80%|████████  | 16/20 [04:22<00:37,  9.39s/it]

Fitting GP with RBF Kernel


 85%|████████▌ | 17/20 [04:27<00:24,  8.12s/it]

Fitting GP with RBF Kernel


 90%|█████████ | 18/20 [04:32<00:14,  7.26s/it]

Fitting GP with RBF Kernel


 95%|█████████▌| 19/20 [04:38<00:06,  6.64s/it]

Fitting GP with RBF Kernel


100%|██████████| 20/20 [04:43<00:00, 14.17s/it]
  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with RBF Kernel


  5%|▌         | 1/20 [00:16<05:20, 16.89s/it]

Fitting GP with RBF Kernel


 10%|█         | 2/20 [00:41<05:43, 19.09s/it]

Fitting GP with RBF Kernel


 15%|█▌        | 3/20 [01:00<05:25, 19.16s/it]

Fitting GP with RBF Kernel


 20%|██        | 4/20 [01:22<05:18, 19.88s/it]

Fitting GP with RBF Kernel


 25%|██▌       | 5/20 [01:42<05:00, 20.01s/it]

Fitting GP with RBF Kernel


 30%|███       | 6/20 [01:57<04:20, 18.59s/it]

Fitting GP with RBF Kernel


 35%|███▌      | 7/20 [02:20<04:16, 19.75s/it]

Fitting GP with RBF Kernel


 40%|████      | 8/20 [03:12<05:53, 29.48s/it]

Fitting GP with RBF Kernel


 45%|████▌     | 9/20 [03:16<04:00, 21.90s/it]

Fitting GP with RBF Kernel


 50%|█████     | 10/20 [03:21<02:49, 16.98s/it]

Fitting GP with RBF Kernel


 55%|█████▌    | 11/20 [03:26<01:59, 13.30s/it]

Fitting GP with RBF Kernel


 60%|██████    | 12/20 [03:32<01:27, 10.91s/it]

Fitting GP with RBF Kernel


 65%|██████▌   | 13/20 [03:37<01:04,  9.22s/it]

Fitting GP with RBF Kernel


 70%|███████   | 14/20 [03:43<00:49,  8.20s/it]

Fitting GP with RBF Kernel


 75%|███████▌  | 15/20 [03:48<00:36,  7.39s/it]

Fitting GP with RBF Kernel


 80%|████████  | 16/20 [03:53<00:26,  6.60s/it]

Fitting GP with RBF Kernel


 85%|████████▌ | 17/20 [03:58<00:18,  6.12s/it]

Fitting GP with RBF Kernel


 90%|█████████ | 18/20 [04:02<00:11,  5.66s/it]

Fitting GP with RBF Kernel


 95%|█████████▌| 19/20 [04:07<00:05,  5.25s/it]

Fitting GP with RBF Kernel


100%|██████████| 20/20 [04:12<00:00, 12.64s/it]
  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with RBF Kernel


  5%|▌         | 1/20 [00:21<06:50, 21.60s/it]

Fitting GP with RBF Kernel


 10%|█         | 2/20 [00:47<06:51, 22.85s/it]

Fitting GP with RBF Kernel


 15%|█▌        | 3/20 [01:08<06:17, 22.21s/it]

Fitting GP with RBF Kernel


 20%|██        | 4/20 [01:31<06:01, 22.56s/it]

Fitting GP with RBF Kernel


 25%|██▌       | 5/20 [01:52<05:30, 22.05s/it]

Fitting GP with RBF Kernel


 30%|███       | 6/20 [02:16<05:15, 22.55s/it]

Fitting GP with RBF Kernel


 35%|███▌      | 7/20 [02:51<05:41, 26.30s/it]

Fitting GP with RBF Kernel


 40%|████      | 8/20 [03:27<05:51, 29.27s/it]

Fitting GP with RBF Kernel


 45%|████▌     | 9/20 [03:30<03:54, 21.34s/it]

Fitting GP with RBF Kernel


 50%|█████     | 10/20 [03:33<02:38, 15.88s/it]

Fitting GP with RBF Kernel


 55%|█████▌    | 11/20 [03:36<01:48, 12.04s/it]

Fitting GP with RBF Kernel


 60%|██████    | 12/20 [03:39<01:13,  9.25s/it]

Fitting GP with RBF Kernel


 65%|██████▌   | 13/20 [03:42<00:52,  7.56s/it]

Fitting GP with RBF Kernel


 70%|███████   | 14/20 [03:46<00:38,  6.34s/it]

Fitting GP with RBF Kernel


 75%|███████▌  | 15/20 [03:49<00:26,  5.36s/it]

Fitting GP with RBF Kernel


 80%|████████  | 16/20 [03:51<00:17,  4.46s/it]

Fitting GP with RBF Kernel


 85%|████████▌ | 17/20 [03:54<00:12,  4.01s/it]

Fitting GP with RBF Kernel


 90%|█████████ | 18/20 [03:57<00:07,  3.57s/it]

Fitting GP with RBF Kernel


 95%|█████████▌| 19/20 [03:59<00:03,  3.29s/it]

Fitting GP with RBF Kernel


100%|██████████| 20/20 [04:02<00:00, 12.12s/it]


Unnamed: 0,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,BIC,Gene,Model,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,-1241.607845,-1313.652413,72.044568,2505.458416,Glul,Linear,0.333333,0.333333,0.333333,0.0,0.0
1,-1234.118478,-1318.192105,84.073627,2490.479683,Sparcl1,Linear,0.333333,0.333333,0.333333,0.0,0.0
2,-1114.823577,-1175.277251,60.453675,2251.88988,Calm2,Linear,0.333333,0.333333,0.333333,7.549517e-15,1.161464e-14
3,-1198.486329,-1254.280175,55.793846,2419.215385,Cpe,Linear,0.333333,0.333333,0.333333,8.049117e-14,1.00614e-13
4,-1143.186334,-1187.638004,44.451671,2308.615394,Snap25,Linear,0.333333,0.333333,0.333333,2.607148e-11,3.067233e-11
5,-1104.334446,-1141.532142,37.197697,2230.911618,Ndrg4,Linear,0.333333,0.333333,0.333333,1.067398e-09,1.185997e-09
6,-1090.546891,-1116.379642,25.832751,2203.336508,Eef1a1,Linear,0.333333,0.333333,0.333333,3.723171e-07,3.919127e-07
7,-1075.859083,-1092.873306,17.014223,2173.960892,Ckb,Linear,0.333333,0.333333,0.333333,3.710086e-05,3.710086e-05
8,-640.205032,-714.00513,73.800098,1302.652791,Gng13,Linear,0.333333,0.333333,0.333333,0.0,0.0
9,-828.991259,-937.841042,108.849783,1680.225245,S100a5,Linear,0.333333,0.333333,0.333333,0.0,0.0


In [11]:
results

Unnamed: 0,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,BIC,Gene,Model,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,-1241.607845,-1313.652413,72.044568,2505.458416,Glul,Linear,0.333333,0.333333,0.333333,0.0,0.0
1,-1234.118478,-1318.192105,84.073627,2490.479683,Sparcl1,Linear,0.333333,0.333333,0.333333,0.0,0.0
2,-1114.823577,-1175.277251,60.453675,2251.88988,Calm2,Linear,0.333333,0.333333,0.333333,7.549517e-15,1.161464e-14
3,-1198.486329,-1254.280175,55.793846,2419.215385,Cpe,Linear,0.333333,0.333333,0.333333,8.049117e-14,1.00614e-13
4,-1143.186334,-1187.638004,44.451671,2308.615394,Snap25,Linear,0.333333,0.333333,0.333333,2.607148e-11,3.067233e-11
5,-1104.334446,-1141.532142,37.197697,2230.911618,Ndrg4,Linear,0.333333,0.333333,0.333333,1.067398e-09,1.185997e-09
6,-1090.546891,-1116.379642,25.832751,2203.336508,Eef1a1,Linear,0.333333,0.333333,0.333333,3.723171e-07,3.919127e-07
7,-1075.859083,-1092.873306,17.014223,2173.960892,Ckb,Linear,0.333333,0.333333,0.333333,3.710086e-05,3.710086e-05
8,-640.205032,-714.00513,73.800098,1302.652791,Gng13,Linear,0.333333,0.333333,0.333333,0.0,0.0
9,-828.991259,-937.841042,108.849783,1680.225245,S100a5,Linear,0.333333,0.333333,0.333333,0.0,0.0
