# Automatic model selection based on linear, periodic and RBF kernels

The test is run on the first 20 spatially varied genes inferred from 'GPcounts_spatial.ipynb'. 

In [1]:
import pandas as pd 
import numpy as np 
import gpflow
import tensorflow as tf
from GPcounts.RNA_seq_GP import rna_seq_gp
from matplotlib import pyplot as plt

In [2]:
Y = pd.read_csv('../data/MouseOB/mouse_ob_SV_genes.csv', index_col=[0]) # File with all the spatially varied genes inferred from 'GPcounts_spatial.ipynb'
Y_total = pd.read_csv('../data/MouseOB/Rep11_MOB_0.csv', index_col=[0]) #  File with the MOUSE-OB counts dataset for all the genes
scale = pd.read_csv('../data/MouseOB/scales_nb.txt', sep="\t") #  File with the scale factors for all the genes. This is to extract the scale factors for the particular spatially varied genes for the purposes of this analysis

In [3]:
spatial_locations = pd.DataFrame(index=Y.index)
spatial_locations['x'] = Y.index.str.split('x').str.get(0).map(float)
spatial_locations['y'] = Y.index.str.split('x').str.get(1).map(float)

In [4]:
spatial_locations['total_counts'] = Y.sum(1)
Y = Y.loc[spatial_locations.index]
X = spatial_locations[['x','y']]

In [5]:
scale_nb_model_sel_tr = pd.read_csv('../data/MouseOB/scale_nb_model_sel.csv', index_col=[0])

In [6]:
Y = Y_total.T

In [7]:
Y_run = Y.iloc[0:20,:]  # Run model_selection for the first 20 spatially varied genes  

In [8]:
gene_name = []
scale = scale_nb_model_sel_tr 
gene_name = Y_run.index
likelihood = 'Negative_binomial' 
gp_counts = rna_seq_gp(X,Y_run.loc[gene_name], scale = scale,safe_mode=False)
 

In [9]:
results = gp_counts.Model_selection_test(likelihood)
results

  0%|                                                    | 0/20 [00:00<?, ?it/s]

Fitting GP with Linear Kernel


  5%|██▏                                         | 1/20 [00:03<01:08,  3.63s/it]

Fitting GP with Linear Kernel


 10%|████▍                                       | 2/20 [00:05<00:43,  2.40s/it]

Fitting GP with Linear Kernel


 15%|██████▌                                     | 3/20 [00:06<00:35,  2.11s/it]

Fitting GP with Linear Kernel


 20%|████████▊                                   | 4/20 [00:08<00:31,  1.95s/it]

Fitting GP with Linear Kernel


 25%|███████████                                 | 5/20 [00:10<00:28,  1.91s/it]

Fitting GP with Linear Kernel


 30%|█████████████▏                              | 6/20 [00:13<00:31,  2.27s/it]

Fitting GP with Linear Kernel


 35%|███████████████▍                            | 7/20 [00:15<00:26,  2.06s/it]

Fitting GP with Linear Kernel


 40%|█████████████████▌                          | 8/20 [00:17<00:24,  2.02s/it]

Fitting GP with Linear Kernel


 45%|███████████████████▊                        | 9/20 [00:18<00:20,  1.83s/it]

Fitting GP with Linear Kernel


 50%|█████████████████████▌                     | 10/20 [00:19<00:16,  1.68s/it]

Fitting GP with Linear Kernel


 55%|███████████████████████▋                   | 11/20 [00:20<00:13,  1.51s/it]

Fitting GP with Linear Kernel


 60%|█████████████████████████▊                 | 12/20 [00:21<00:10,  1.35s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 65%|███████████████████████████▉               | 13/20 [00:23<00:10,  1.48s/it]

Fitting GP with Linear Kernel


 70%|██████████████████████████████             | 14/20 [00:25<00:08,  1.45s/it]

Fitting GP with Linear Kernel


 75%|████████████████████████████████▎          | 15/20 [00:26<00:06,  1.35s/it]

Fitting GP with Linear Kernel


 80%|██████████████████████████████████▍        | 16/20 [00:27<00:05,  1.27s/it]

Fitting GP with Linear Kernel


 85%|████████████████████████████████████▌      | 17/20 [00:28<00:03,  1.26s/it]

Fitting GP with Linear Kernel


 90%|██████████████████████████████████████▋    | 18/20 [00:29<00:02,  1.31s/it]

Fitting GP with Linear Kernel


 95%|████████████████████████████████████████▊  | 19/20 [00:30<00:01,  1.21s/it]

Fitting GP with Linear Kernel


100%|███████████████████████████████████████████| 20/20 [00:31<00:00,  1.59s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)
  0%|                                                    | 0/20 [00:00<?, ?it/s]

Fitting GP with Periodic Kernel


  5%|██▏                                         | 1/20 [00:04<01:20,  4.25s/it]

Fitting GP with Periodic Kernel


 10%|████▍                                       | 2/20 [00:10<01:35,  5.29s/it]

Fitting GP with Periodic Kernel


 15%|██████▌                                     | 3/20 [00:27<02:59, 10.56s/it]

Fitting GP with Periodic Kernel


 20%|████████▊                                   | 4/20 [00:37<02:47, 10.47s/it]

Fitting GP with Periodic Kernel


 25%|███████████                                 | 5/20 [00:44<02:15,  9.06s/it]

Fitting GP with Periodic Kernel


 30%|█████████████▏                              | 6/20 [01:03<02:56, 12.60s/it]

Fitting GP with Periodic Kernel


 35%|███████████████▍                            | 7/20 [01:11<02:22, 11.00s/it]

Fitting GP with Periodic Kernel


 40%|█████████████████▌                          | 8/20 [01:30<02:44, 13.67s/it]

Fitting GP with Periodic Kernel


 45%|███████████████████▊                        | 9/20 [01:36<02:02, 11.10s/it]

Fitting GP with Periodic Kernel


 50%|█████████████████████▌                     | 10/20 [01:52<02:07, 12.76s/it]

Fitting GP with Periodic Kernel


 55%|███████████████████████▋                   | 11/20 [02:03<01:51, 12.37s/it]

Fitting GP with Periodic Kernel


 60%|█████████████████████████▊                 | 12/20 [02:07<01:17,  9.74s/it]

Fitting GP with Periodic Kernel


 65%|███████████████████████████▉               | 13/20 [02:23<01:20, 11.53s/it]

Fitting GP with Periodic Kernel


 70%|██████████████████████████████             | 14/20 [02:29<01:00, 10.02s/it]

Fitting GP with Periodic Kernel


 75%|████████████████████████████████▎          | 15/20 [02:37<00:46,  9.21s/it]

Fitting GP with Periodic Kernel


 80%|██████████████████████████████████▍        | 16/20 [02:44<00:34,  8.73s/it]

Fitting GP with Periodic Kernel


 85%|████████████████████████████████████▌      | 17/20 [02:52<00:25,  8.50s/it]

Fitting GP with Periodic Kernel


 90%|██████████████████████████████████████▋    | 18/20 [02:58<00:15,  7.65s/it]

Fitting GP with Periodic Kernel


 95%|████████████████████████████████████████▊  | 19/20 [03:14<00:10, 10.03s/it]

Fitting GP with Periodic Kernel


100%|███████████████████████████████████████████| 20/20 [03:18<00:00,  9.94s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)
100%|███████████████████████████████████████████| 20/20 [01:33<00:00,  4.65s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)


Unnamed: 0,Gene,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,Model,BIC,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,2010300C02Rik,-660.240087,-683.893721,23.653634,RBF,1342.722901,1.729649e-27,8.377161e-11,1.0,6.068701e-12,1.21374e-10
1,Ate1,-507.338554,-511.078156,3.739601,Periodic,1036.919835,4.896035e-59,0.9996996,0.0003,0.00624157,0.05243306
2,Atp5j2,-674.508163,-674.590755,0.082592,Periodic,1371.259052,3.855854e-10,0.5413119,0.458688,0.6844274,0.9720863
3,B3gat3,-535.283282,-537.341643,2.058361,RBF,1092.809291,1.015705e-05,0.2626488,0.737341,0.0424613,0.2830753
4,Bbs1,-404.293674,-404.421127,0.127454,Periodic,830.830074,1.094642e-40,0.5647154,0.435285,0.6136406,0.9720863
5,Ccnl1,-474.894846,-474.892581,-0.002265,Periodic,972.032418,2.5679339999999997e-57,0.7054979,0.294502,1.0,1.0
6,Inpp5f,-642.020899,-642.098449,0.07755,Periodic,1306.284525,3.638145e-18,0.5218644,0.478136,0.6937092,0.9720863
7,Lix1,-619.402483,-620.574537,1.172054,RBF,1261.047693,1.3586639999999999e-77,0.08524656,0.914753,0.1257576,0.6287878
8,Lrrfip1,-430.093032,-430.922986,0.829954,RBF,882.42879,2.8078440000000003e-25,0.1953822,0.804618,0.1976157,0.658719
9,Nap1l1,-703.545063,-709.64614,6.101077,RBF,1429.332852,1.394061e-11,0.0005661199,0.999434,0.0004773438,0.004773438
