In [1]:
import numpy as np
from numpy import matrix
import sklearn.metrics
from cvxopt import matrix, solvers
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import spearmanr 
import scanpy as sc
import seaborn as sns

In [2]:

#%% Kernel
def kernel(ker, X1, X2, gamma):
    K = None
    if ker == 'linear':
        if X2 is not None:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1), np.asarray(X2))
        else:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1))
    elif ker == 'rbf':
        if X2 is not None:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1), np.asarray(X2), gamma)
        else:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1), None, gamma)
    return K


#%% Kernel Mean Matching (KMM)
class KMM:
    def __init__(self, kernel_type='linear', gamma=1.0, B=1.0, eps=None):
        '''
        Initialization function
        :param kernel_type: 'linear' | 'rbf'
        :param gamma: kernel bandwidth for rbf kernel
        :param B: bound for beta
        :param eps: bound for sigma_beta
        '''
        self.kernel_type = kernel_type
        self.gamma = gamma
        self.B = B
        self.eps = eps

    def fit(self, Xs, Xt):
        '''
        Fit source and target using KMM (compute the coefficients)
        :param Xs: ns * dim
        :param Xt: nt * dim
        :return: Coefficients (Pt / Ps) value vector (Beta in the paper)
        '''
        ns = Xs.shape[0]
        nt = Xt.shape[0]
        if self.eps == None:
            self.eps = self.B / np.sqrt(ns)
        K = kernel(self.kernel_type, Xs, None, self.gamma)
        kappa = np.sum(kernel(self.kernel_type, Xs, Xt, self.gamma) * float(ns) / float(nt), axis=1)

        K = matrix(K)
        kappa = matrix(kappa)
        G = matrix(np.r_[np.ones((1, ns)), -np.ones((1, ns)), np.eye(ns), -np.eye(ns)])
        h = matrix(np.r_[ns * (1 + self.eps), ns * (self.eps - 1), self.B * np.ones((ns,)), np.zeros((ns,))])

        sol = solvers.qp(K, -kappa, G, h)
        beta = np.array(sol['x'])
        return beta

In [3]:
cell=sc.read_h5ad('cell_seurat.h5ad')
bulk=sc.read_h5ad('bulk_seurat.h5ad')

In [4]:
cell

AnnData object with n_obs × n_vars = 6400 × 19840
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'celltype'
    var: 'features'

In [5]:
bulk

AnnData object with n_obs × n_vars = 6 × 19840
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA'
    var: 'features'

In [6]:
cell_X=pd.DataFrame(cell.X.todense())
bulk_X=pd.DataFrame(bulk.X.todense())

In [7]:
kmm = KMM(kernel_type='rbf', B=1)
beta = kmm.fit(bulk_X, cell_X)

     pcost       dcost       gap    pres   dres
 0:  7.6047e+00 -2.8789e+01  4e+01  5e-17  4e-15
 1:  6.5250e+00  4.6901e+00  2e+00  7e-17  2e-15
 2:  6.3049e+00  6.2849e+00  2e-02  1e-16  1e-15
 3:  6.3025e+00  6.3016e+00  9e-04  8e-17  5e-14
 4:  6.3022e+00  6.3020e+00  2e-04  2e-16  1e-11
 5:  6.3021e+00  6.3021e+00  3e-05  1e-16  7e-11
 6:  6.3021e+00  6.3021e+00  1e-06  6e-17  4e-11
Optimal solution found.


In [8]:
beta

array([[0.75666106],
       [0.79553252],
       [0.99658132],
       [0.00125949],
       [0.99925444],
       [0.00122146]])

In [9]:
beta=pd.DataFrame(beta)
beta.to_csv('beta.csv',index= False, header=0)