### Application of GPcounts with Negative Binomial likelihood on spatial data from Mouse olfactory bulb to identify spatially expressed genes. 

We first calculate the scale factors using negative binomial regression with identity link function using R glm package and save the scale factors in  ./data/MouseOB/scales_nb.txt. The R script is also available in ./data/MouseOB/nb_regression.R  

Required packages:

gpflow 2.0.0
tensorflow 2.1.0
python 3.7.x


In [1]:
import pandas as pd 
import numpy as np 
import gpflow
import tensorflow as tf
from GPcounts import NegativeBinomialLikelihood
from GPcounts.GPcounts_Module import Fit_GPcounts
from matplotlib import pyplot as plt

In [2]:
def get_coords(index):
    coords = pd.DataFrame(index=index)
    coords['x'] = index.str.split('x').str.get(0).map(float)
    coords['y'] = index.str.split('x').str.get(1).map(float)
    return coords

In [3]:
# spatial expression counts matrix
Y = pd.read_csv('../data/MouseOB/Rep11_MOB_0.csv', index_col=[0])
Y = Y.T[Y.sum(0) >= 3].T

In [4]:
sample_info = get_coords(Y.index)
sample_info['total_counts'] = Y.sum(1)
sample_info = sample_info.query('total_counts > 10')  # Remove empty features
Y = Y.loc[sample_info.index]
X = sample_info[['x', 'y']]
scale=pd.read_csv('../data/MouseOB/scales_nb.txt', sep="\t")

In [5]:
Y=Y.T
Y=Y.iloc[0:4,:] # select first 100 genes to run GPcounts
X = X[['x', 'y']]

In [6]:
gene_name = []
scaled = True # set the default scaled argument to True to consider the scale factors
gene_name = Y.index
likelihood = 'Negative_binomial' 
gp_counts = Fit_GPcounts(X,Y.loc[gene_name],scale.loc[scale.index], scaled = scaled, nb_scaled = True)

In [None]:
log_likelihood_ratio = gp_counts.One_sample_test(likelihood)
print(log_likelihood_ratio)

Plot some spatially expressed genes from the data set

In [None]:
df = log_likelihood_ratio.sort_values(by=['q value'])
df.head(20)

In [None]:
from matplotlib import rcParams
plt.rcParams["figure.figsize"] = [10,3]
m = ['Pcp4', 'Fyco1', 'Eef1a1']
for i, g in enumerate(['Pcp4', 'Fyco1', 'Eef1a1']):
    plt.subplot(1, 3, i + 1)
    plt.scatter(sample_info['x'], sample_info['y'], c=Y.T[g],cmap = 'summer')
    # plt.figure(figsize=(10,3))
    plt.title(m[i])
    plt.axis('equal')
plt.colorbar(ticks=[0,1],label = 'Gene Epxression')

plt.show()
