# Automatic model selection based on linear, periodic and RBF kernels

The test is run on the first 20 spatially varied genes inferred from 'GPcounts_spatial.ipynb'. 

In [1]:
import pandas as pd 
import numpy as np 
import gpflow
import tensorflow as tf
from GPcounts.RNA_seq_GP import rna_seq_gp
from matplotlib import pyplot as plt

In [2]:
Y = pd.read_csv('../data/MouseOB/mouse_ob_SV_genes.csv', index_col=[0]) # File with all the spatially varied genes inferred from 'GPcounts_spatial.ipynb'
Y_total = pd.read_csv('../data/MouseOB/Rep11_MOB_0.csv', index_col=[0]) #  File with the MOUSE-OB counts dataset for all the genes
scale = pd.read_csv('../data/MouseOB/scales_nb.txt', sep="\t") #  File with the scale factors for all the genes. This is to extract the scale factors for the particular spatially varied genes for the purposes of this analysis

In [3]:
spatial_locations = pd.DataFrame(index=Y.index)
spatial_locations['x'] = Y.index.str.split('x').str.get(0).map(float)
spatial_locations['y'] = Y.index.str.split('x').str.get(1).map(float)

In [4]:
spatial_locations['total_counts'] = Y.sum(1)
Y = Y.loc[spatial_locations.index]
X = spatial_locations[['x','y']]

In [5]:
scale_nb_model_sel_tr = pd.read_csv('../data/MouseOB/scale_nb_model_sel.csv', index_col=[0])

In [6]:
Y = Y_total.T

In [7]:
Y_run = Y.iloc[0:20,:]  # Run model_selection for the first 20 spatially varied genes  

In [8]:
gene_name = []
scale = scale_nb_model_sel_tr 
gene_name = Y_run.index
likelihood = 'Negative_binomial' 
gp_counts = rna_seq_gp(X,Y_run.loc[gene_name], scale = scale,safe_mode=False)
 

In [9]:
results = gp_counts.Model_selection_test(likelihood)
results

  0%|                                                    | 0/20 [00:00<?, ?it/s]

Fitting GP with Linear Kernel


  5%|██▏                                         | 1/20 [00:02<00:43,  2.27s/it]

Fitting GP with Linear Kernel


 10%|████▍                                       | 2/20 [00:03<00:35,  1.95s/it]

Fitting GP with Linear Kernel


 15%|██████▌                                     | 3/20 [00:06<00:34,  2.02s/it]

Fitting GP with Linear Kernel


 20%|████████▊                                   | 4/20 [00:07<00:31,  1.96s/it]

Fitting GP with Linear Kernel


 25%|███████████                                 | 5/20 [00:09<00:29,  1.93s/it]

Fitting GP with Linear Kernel


 30%|█████████████▏                              | 6/20 [00:12<00:29,  2.10s/it]

Fitting GP with Linear Kernel


 35%|███████████████▍                            | 7/20 [00:14<00:26,  2.03s/it]

Fitting GP with Linear Kernel


 40%|█████████████████▌                          | 8/20 [00:16<00:27,  2.25s/it]

Fitting GP with Linear Kernel


 45%|███████████████████▊                        | 9/20 [00:18<00:23,  2.09s/it]

Fitting GP with Linear Kernel


 50%|█████████████████████▌                     | 10/20 [00:20<00:20,  2.09s/it]

Fitting GP with Linear Kernel


 55%|███████████████████████▋                   | 11/20 [00:22<00:17,  1.94s/it]

Fitting GP with Linear Kernel


 60%|█████████████████████████▊                 | 12/20 [00:23<00:14,  1.86s/it]

Fitting GP with Linear Kernel
Fitting GP with Linear Kernel
Fitting GP with Linear Kernel


 65%|███████████████████████████▉               | 13/20 [00:26<00:14,  2.09s/it]

Fitting GP with Linear Kernel


 70%|██████████████████████████████             | 14/20 [00:28<00:11,  1.99s/it]

Fitting GP with Linear Kernel


 75%|████████████████████████████████▎          | 15/20 [00:29<00:09,  1.85s/it]

Fitting GP with Linear Kernel


 80%|██████████████████████████████████▍        | 16/20 [00:31<00:06,  1.75s/it]

Fitting GP with Linear Kernel


 85%|████████████████████████████████████▌      | 17/20 [00:32<00:05,  1.69s/it]

Fitting GP with Linear Kernel


 90%|██████████████████████████████████████▋    | 18/20 [00:34<00:03,  1.70s/it]

Fitting GP with Linear Kernel


 95%|████████████████████████████████████████▊  | 19/20 [00:36<00:01,  1.61s/it]

Fitting GP with Linear Kernel


100%|███████████████████████████████████████████| 20/20 [00:37<00:00,  1.87s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)
  0%|                                                    | 0/20 [00:00<?, ?it/s]

Fitting GP with Periodic Kernel


  5%|██▏                                         | 1/20 [00:04<01:25,  4.51s/it]

Fitting GP with Periodic Kernel


 10%|████▍                                       | 2/20 [00:13<02:11,  7.29s/it]

Fitting GP with Periodic Kernel


 15%|██████▌                                     | 3/20 [00:18<01:45,  6.20s/it]

Fitting GP with Periodic Kernel


 20%|████████▊                                   | 4/20 [00:24<01:34,  5.92s/it]

Fitting GP with Periodic Kernel


 25%|███████████                                 | 5/20 [00:30<01:32,  6.14s/it]

Fitting GP with Periodic Kernel


 30%|█████████████▏                              | 6/20 [00:41<01:48,  7.72s/it]

Fitting GP with Periodic Kernel


 35%|███████████████▍                            | 7/20 [00:45<01:25,  6.54s/it]

Fitting GP with Periodic Kernel


 40%|█████████████████▌                          | 8/20 [00:51<01:15,  6.30s/it]

Fitting GP with Periodic Kernel


 45%|███████████████████▊                        | 9/20 [00:55<01:02,  5.65s/it]

Fitting GP with Periodic Kernel


 50%|█████████████████████▌                     | 10/20 [01:04<01:05,  6.53s/it]

Fitting GP with Periodic Kernel


 55%|███████████████████████▋                   | 11/20 [01:11<01:02,  6.96s/it]

Fitting GP with Periodic Kernel


 60%|█████████████████████████▊                 | 12/20 [01:14<00:44,  5.61s/it]

Fitting GP with Periodic Kernel


 65%|███████████████████████████▉               | 13/20 [01:23<00:47,  6.78s/it]

Fitting GP with Periodic Kernel


 70%|██████████████████████████████             | 14/20 [01:30<00:40,  6.81s/it]

Fitting GP with Periodic Kernel


 75%|████████████████████████████████▎          | 15/20 [01:36<00:31,  6.32s/it]

Fitting GP with Periodic Kernel


 80%|██████████████████████████████████▍        | 16/20 [01:39<00:22,  5.52s/it]

Fitting GP with Periodic Kernel


 85%|████████████████████████████████████▌      | 17/20 [01:44<00:15,  5.32s/it]

Fitting GP with Periodic Kernel


 90%|██████████████████████████████████████▋    | 18/20 [01:48<00:09,  4.84s/it]

Fitting GP with Periodic Kernel


 95%|████████████████████████████████████████▊  | 19/20 [02:00<00:07,  7.09s/it]

Fitting GP with Periodic Kernel


100%|███████████████████████████████████████████| 20/20 [02:04<00:00,  6.21s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)
100%|███████████████████████████████████████████| 20/20 [01:29<00:00,  4.49s/it]
  pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
  selection_results.groupby(["Gene", "Model"])["BIC"].transform(min)
  selection_results.groupby("Gene")["BIC"].transform(min)


Unnamed: 0,Gene,Dynamic_model_log_likelihood,Constant_model_log_likelihood,log_likelihood_ratio,Model,BIC,Linear_probability,Periodic_probability,RBF_probability,p_value,q_value
0,2010300C02Rik,-660.240087,-683.893721,23.653634,RBF,1342.722901,1.729649e-27,5.924818e-11,1.0,6.068701e-12,1.21374e-10
1,Ate1,-507.338327,-511.078156,3.739829,Periodic,1036.919381,4.893894e-59,0.9996985,0.000302,0.006239994,0.05244739
2,Atp5j2,-674.50785,-674.590755,0.082905,Periodic,1371.258427,3.854618e-10,0.5414766,0.458523,0.6838624,0.9720777
3,B3gat3,-535.283282,-537.341643,2.058361,RBF,1092.80929,1.015702e-05,0.2626505,0.737339,0.04246129,0.2830753
4,Bbs1,-404.194203,-404.421127,0.226924,Periodic,830.631132,9.559636e-41,0.6017298,0.39827,0.5005132,0.9720777
5,Ccnl1,-475.367135,-474.89258,-0.474555,Periodic,972.976996,5.0900379999999995e-57,0.5437016,0.456298,1.0,1.0
6,Inpp5f,-642.021103,-642.098449,0.077346,Periodic,1306.284932,3.638407e-18,0.5216895,0.478311,0.6940912,0.9720777
7,Lix1,-619.397085,-620.574537,1.177452,RBF,1261.036896,1.349697e-77,0.08143098,0.918569,0.1248896,0.5038589
8,Lrrfip1,-429.752214,-430.922986,1.170771,RBF,881.747155,1.571972e-25,0.1093643,0.890636,0.1259647,0.5038589
9,Nap1l1,-703.545063,-709.64614,6.101077,RBF,1429.332852,1.394057e-11,0.0005685395,0.999431,0.0004773436,0.004773436
