# Automatic model selection based on linear, periodic and RBF kernels

The test is run on the first 20 spatially varied genes inferred from 'GPcounts_spatial.ipynb'. 

In [1]:
import pandas as pd 
import numpy as np 
import gpflow
import tensorflow as tf
from GPcounts import NegativeBinomialLikelihood
from GPcounts.GPcounts_Module import Fit_GPcounts
from matplotlib import pyplot as plt

In [2]:
Y = pd.read_csv('../data/MouseOB/mouse_ob_SV_genes.csv', index_col=[0]) # File with all the spatially varied genes inferred from 'GPcounts_spatial.ipynb'
Y_total = pd.read_csv('../data/MouseOB/Rep11_MOB_0.csv', index_col=[0]) #  File with the MOUSE-OB counts dataset for all the genes
scale = pd.read_csv('../data/MouseOB/scales_nb_wholedataset.txt', sep="\t") #  File with the scale factors for all the genes. This is to extract the scale factors for the particular spatially varied genes for the purposes of this analysis

In [3]:
spatial_locations = pd.DataFrame(index=Y.index)
spatial_locations['x'] = Y.index.str.split('x').str.get(0).map(float)
spatial_locations['y'] = Y.index.str.split('x').str.get(1).map(float)


In [4]:
spatial_locations['total_counts'] = Y.sum(1)
Y = Y.loc[spatial_locations.index]
X = spatial_locations[['x','y']]

In [7]:
# In this cell we extract the particular scale factors for the spatially varied genes and we named it as "scale_nb_model_sel"
names = []
for col in Y_total:
        names.append(col)
scale_nb_model_sel = scale.set_axis(names, axis=1, inplace=False)

scale_nb_new = []
for col in Y:
        scale_nb_new.append(scale_nb_model_sel[col])

scale_nb_model_sel = pd.DataFrame(scale_nb_new)  

In [8]:
# The final scale factors for the spatially varied genes are scale_nb_model_sel_tr, saved as "scale_nb_model_sel.csv"
scale_nb_model_sel_tr = scale_nb_model_sel.T
scale_nb_model_sel_tr.to_csv('scale_nb_model_sel.csv')
scale_nb_model_sel_tr

Unnamed: 0,Glul,Sparcl1,Calm2,Cpe,Snap25,Ndrg4,Eef1a1,Ckb,Gng13,S100a5,...,Ywhag,Nptn,Calm3,Ptma,Rtn1,Stmn3,Hnrnpa2b1,Atp5g3,Cox4i1,Cd81
0,66.989928,73.227781,51.426685,64.440720,56.936740,56.361118,52.728510,59.573597,3.487625,8.867573,...,21.785099,17.866144,19.342966,16.985675,20.437024,15.047219,22.214694,18.060745,22.028045,17.719189
1,131.876393,144.156233,101.238588,126.858021,112.085684,110.952514,103.801362,117.276602,6.865740,17.456707,...,42.886152,35.171297,38.078570,33.438005,40.232332,29.621961,43.731854,35.554389,43.364416,34.882001
2,44.461489,48.601578,34.132101,42.769570,37.789148,37.407105,34.996128,39.539240,2.314751,5.885444,...,14.458859,11.857833,12.838006,11.273462,13.564136,9.986901,14.743983,11.986991,14.620104,11.760298
3,80.940518,88.477398,62.136244,77.860439,68.793763,68.098268,63.709173,71.979743,4.213920,10.714237,...,26.321825,21.586752,23.371121,20.522926,24.693015,18.180789,26.840883,21.821879,26.615365,21.409194
4,66.458714,72.647103,51.018884,63.929721,56.485246,55.914188,52.310386,59.101193,3.459969,8.797255,...,21.612348,17.724470,19.189581,16.850983,20.274963,14.927899,22.038537,17.917528,21.853368,17.578680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,102.552702,112.102029,78.727439,98.650202,87.162604,86.281402,80.720362,91.199282,5.339092,13.575079,...,33.350099,27.350699,29.611518,26.002817,31.286375,23.035299,34.007753,27.648607,33.722018,27.125730
256,91.325917,99.829848,70.108885,87.850637,77.620624,76.835891,71.883636,81.215394,4.754604,12.088970,...,29.699153,24.356527,26.369847,23.156202,27.861352,20.513548,30.284811,24.621823,30.030357,24.156187
257,97.415262,106.486210,74.783541,93.708261,82.796140,81.959083,76.676626,86.630599,5.071627,12.895027,...,31.679406,25.980549,28.128111,24.700190,29.719065,21.881332,32.304114,26.263534,32.032693,25.766851
258,59.987894,65.573744,46.051378,57.705138,50.985502,50.470046,47.217132,53.346745,3.123086,7.940701,...,19.508040,15.998709,17.321168,15.210269,18.300871,13.474429,19.892732,16.172970,19.725593,15.867114


In [9]:
Y = Y.T

In [11]:
Y_run = Y.iloc[0:20,:]  # Run model_selection for the first 20 spatially varied genes  

In [12]:
gene_name = []
# scale = pd.read_csv('../data/MouseOB/scale_nb_model_sel.csv',index_col=[0]) # load the file with the scale factors  
scale = scale_nb_model_sel_tr
nb_scaled = True # set the nb_scaled argument to True to pass the scale factors 
gene_name = Y_run.index
likelihood = 'Negative_binomial' 
gp_counts = Fit_GPcounts(X,Y_run.loc[gene_name], scale = scale, nb_scaled=nb_scaled,safe_mode=False)

In [13]:
results = gp_counts.Model_selection_test(likelihood)
results

  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


  5%|▌         | 1/20 [00:04<01:33,  4.90s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 10%|█         | 2/20 [00:07<01:15,  4.21s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 15%|█▌        | 3/20 [00:10<01:03,  3.71s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 20%|██        | 4/20 [00:12<00:53,  3.36s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 25%|██▌       | 5/20 [00:15<00:47,  3.18s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 30%|███       | 6/20 [00:18<00:42,  3.06s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 35%|███▌      | 7/20 [00:20<00:38,  2.92s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 40%|████      | 8/20 [00:23<00:34,  2.86s/it]

Fitting GP with Linear Kernel


 45%|████▌     | 9/20 [00:25<00:29,  2.65s/it]

Fitting GP with Linear Kernel


 50%|█████     | 10/20 [00:27<00:23,  2.38s/it]

Fitting GP with Linear Kernel


 55%|█████▌    | 11/20 [00:29<00:19,  2.22s/it]

Fitting GP with Linear Kernel


 60%|██████    | 12/20 [00:30<00:16,  2.06s/it]

Fitting GP with Linear Kernel


 65%|██████▌   | 13/20 [00:32<00:13,  1.93s/it]

Fitting GP with Linear Kernel


 70%|███████   | 14/20 [00:34<00:11,  1.86s/it]

Fitting GP with Linear Kernel


 75%|███████▌  | 15/20 [00:35<00:09,  1.82s/it]

Fitting GP with Linear Kernel


 80%|████████  | 16/20 [00:37<00:07,  1.80s/it]

Fitting GP with Linear Kernel


 85%|████████▌ | 17/20 [00:39<00:05,  1.85s/it]

Fitting GP with Linear Kernel


 90%|█████████ | 18/20 [00:41<00:03,  1.92s/it]

Fitting GP with Linear Kernel


 95%|█████████▌| 19/20 [00:43<00:02,  2.02s/it]

Fitting GP with Linear Kernel


100%|██████████| 20/20 [00:46<00:00,  2.31s/it]
  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with Periodic Kernel


  5%|▌         | 1/20 [00:13<04:11, 13.24s/it]

Fitting GP with Periodic Kernel


 10%|█         | 2/20 [00:31<04:27, 14.84s/it]

Fitting GP with Periodic Kernel


 15%|█▌        | 3/20 [00:43<03:54, 13.80s/it]

Fitting GP with Periodic Kernel


 20%|██        | 4/20 [01:02<04:07, 15.47s/it]

Fitting GP with Periodic Kernel


 25%|██▌       | 5/20 [01:32<04:56, 19.76s/it]

Fitting GP with Periodic Kernel


 30%|███       | 6/20 [02:00<05:13, 22.41s/it]

Fitting GP with Periodic Kernel


 35%|███▌      | 7/20 [02:28<05:11, 23.95s/it]

Fitting GP with Periodic Kernel


 40%|████      | 8/20 [02:58<05:10, 25.90s/it]

Fitting GP with Periodic Kernel


 45%|████▌     | 9/20 [03:20<04:30, 24.58s/it]

Fitting GP with Periodic Kernel


 50%|█████     | 10/20 [03:59<04:48, 28.85s/it]

Fitting GP with Periodic Kernel


 55%|█████▌    | 11/20 [04:33<04:34, 30.47s/it]

Fitting GP with Periodic Kernel


 60%|██████    | 12/20 [04:59<03:53, 29.16s/it]

Fitting GP with Periodic Kernel


 65%|██████▌   | 13/20 [05:55<04:20, 37.20s/it]

Fitting GP with Periodic Kernel


 70%|███████   | 14/20 [06:20<03:21, 33.65s/it]

Fitting GP with Periodic Kernel


 75%|███████▌  | 15/20 [07:10<03:11, 38.38s/it]

Fitting GP with Periodic Kernel


 80%|████████  | 16/20 [07:43<02:27, 36.79s/it]

Fitting GP with Periodic Kernel


 85%|████████▌ | 17/20 [08:18<01:49, 36.42s/it]

Fitting GP with Periodic Kernel


 90%|█████████ | 18/20 [08:40<01:03, 31.88s/it]

Fitting GP with Periodic Kernel


 95%|█████████▌| 19/20 [09:05<00:29, 29.84s/it]

Fitting GP with Periodic Kernel


100%|██████████| 20/20 [09:33<00:00, 28.67s/it]
  0%|          | 0/20 [00:00<?, ?it/s]

Fitting GP with RBF Kernel


  5%|▌         | 1/20 [00:06<02:07,  6.72s/it]

Fitting GP with RBF Kernel


 10%|█         | 2/20 [00:14<02:08,  7.15s/it]

Fitting GP with RBF Kernel


 15%|█▌        | 3/20 [00:22<02:05,  7.40s/it]

Fitting GP with RBF Kernel


 20%|██        | 4/20 [00:32<02:08,  8.06s/it]

Fitting GP with RBF Kernel


 25%|██▌       | 5/20 [00:42<02:11,  8.75s/it]

Fitting GP with RBF Kernel


 30%|███       | 6/20 [00:55<02:17,  9.85s/it]

Fitting GP with RBF Kernel


 35%|███▌      | 7/20 [01:09<02:26, 11.28s/it]

Fitting GP with RBF Kernel


 40%|████      | 8/20 [01:28<02:43, 13.63s/it]

Fitting GP with RBF Kernel


 45%|████▌     | 9/20 [01:31<01:52, 10.22s/it]

Fitting GP with RBF Kernel


 50%|█████     | 10/20 [01:33<01:19,  7.97s/it]

Fitting GP with RBF Kernel


 55%|█████▌    | 11/20 [01:36<00:57,  6.36s/it]

Fitting GP with RBF Kernel


 60%|██████    | 12/20 [01:38<00:40,  5.07s/it]

Fitting GP with RBF Kernel


 65%|██████▌   | 13/20 [01:41<00:29,  4.27s/it]

Fitting GP with RBF Kernel


 70%|███████   | 14/20 [01:43<00:21,  3.66s/it]

Fitting GP with RBF Kernel


 75%|███████▌  | 15/20 [01:45<00:16,  3.25s/it]

Fitting GP with RBF Kernel


 80%|████████  | 16/20 [01:47<00:11,  2.80s/it]

Fitting GP with RBF Kernel


 85%|████████▌ | 17/20 [01:49<00:07,  2.65s/it]

Fitting GP with RBF Kernel


 90%|█████████ | 18/20 [01:51<00:04,  2.43s/it]

Fitting GP with RBF Kernel


 95%|█████████▌| 19/20 [01:53<00:02,  2.30s/it]

Fitting GP with RBF Kernel


100%|██████████| 20/20 [01:55<00:00,  5.77s/it]


Unnamed: 0,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,BIC,Gene,Model,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,-1241.607837,-1313.652413,72.044576,2505.4584,Glul,RBF,2.247871e-56,9.509018e-61,1.0,0.0,0.0
1,-1234.118514,-1318.192105,84.073592,2490.479754,Sparcl1,RBF,9.817604e-67,1.516478e-71,1.0,0.0,0.0
2,-1114.823577,-1175.277251,60.453674,2251.889881,Calm2,RBF,5.336216e-46,8.219199e-51,1.0,7.549517e-15,1.161464e-14
3,-1198.486329,-1254.280174,55.793846,2419.215384,Cpe,RBF,4.629769e-42,3.2294649999999996e-48,1.0,8.049117e-14,1.00614e-13
4,-1143.186331,-1187.638005,44.451674,2308.615388,Snap25,RBF,4.0998900000000003e-32,1.004192e-36,1.0,2.607137e-11,3.06722e-11
5,-1104.334452,-1141.532142,37.19769,2230.911631,Ndrg4,RBF,2.3125790000000002e-25,1.063888e-29,1.0,1.067401e-09,1.186001e-09
6,-1090.546884,-1116.379641,25.832757,2203.336494,Eef1a1,RBF,8.968134e-16,2.2421520000000003e-17,1.0,3.723158e-07,3.919114e-07
7,-1075.859082,-1092.873306,17.014224,2173.96089,Ckb,RBF,9.21727e-07,0.0001188336,0.99988,3.710084e-05,3.710084e-05
8,-640.205032,-714.005117,73.800085,1302.652791,Gng13,RBF,6.327532e-53,1.4699820000000001e-33,1.0,0.0,0.0
9,-828.991255,-937.841093,108.849838,1680.225237,S100a5,RBF,6.925603e-84,5.860544999999999e-44,1.0,0.0,0.0


In [14]:
results

Unnamed: 0,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,BIC,Gene,Model,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,-1241.607837,-1313.652413,72.044576,2505.4584,Glul,RBF,2.247871e-56,9.509018e-61,1.0,0.0,0.0
1,-1234.118514,-1318.192105,84.073592,2490.479754,Sparcl1,RBF,9.817604e-67,1.516478e-71,1.0,0.0,0.0
2,-1114.823577,-1175.277251,60.453674,2251.889881,Calm2,RBF,5.336216e-46,8.219199e-51,1.0,7.549517e-15,1.161464e-14
3,-1198.486329,-1254.280174,55.793846,2419.215384,Cpe,RBF,4.629769e-42,3.2294649999999996e-48,1.0,8.049117e-14,1.00614e-13
4,-1143.186331,-1187.638005,44.451674,2308.615388,Snap25,RBF,4.0998900000000003e-32,1.004192e-36,1.0,2.607137e-11,3.06722e-11
5,-1104.334452,-1141.532142,37.19769,2230.911631,Ndrg4,RBF,2.3125790000000002e-25,1.063888e-29,1.0,1.067401e-09,1.186001e-09
6,-1090.546884,-1116.379641,25.832757,2203.336494,Eef1a1,RBF,8.968134e-16,2.2421520000000003e-17,1.0,3.723158e-07,3.919114e-07
7,-1075.859082,-1092.873306,17.014224,2173.96089,Ckb,RBF,9.21727e-07,0.0001188336,0.99988,3.710084e-05,3.710084e-05
8,-640.205032,-714.005117,73.800085,1302.652791,Gng13,RBF,6.327532e-53,1.4699820000000001e-33,1.0,0.0,0.0
9,-828.991255,-937.841093,108.849838,1680.225237,S100a5,RBF,6.925603e-84,5.860544999999999e-44,1.0,0.0,0.0
