#### Compute kernel matrix based on genotype score. 
1. G-Matrix (Linear kernel)
2. Euclidean distance based Gaussian kernel

Import genotype data (a R object) to python

In [1]:
import numpy as np
import rpy2.robjects as robjects
from rpy2.robjects import r
from rpy2.robjects.numpy2ri import numpy2ri 

robjects.r('''
        load('hybridGenotypeScore.Rdata')         
        ''')
hybridGenotypeScore = robjects.globalenv['hybridGenotypeScore']
np.shape(hybridGenotypeScore)

(2149, 232631)

Normalize the genotype data for each marker

In [2]:
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import scale
scaledGenotype = scale(hybridGenotypeScore, axis=0, with_mean=True, with_std=True, copy=True)
scaledGenotype[0:5, 0:5]

array([[-0.60080131, -0.28266499, -0.25992471,  2.07422975,  2.07422975],
       [-0.60080131, -0.28266499, -0.25992471, -0.43140248, -0.43140248],
       [-0.60080131, -0.28266499, -0.25992471, -0.43140248, -0.43140248],
       [-0.60080131, -0.28266499, -0.25992471, -0.43140248, -0.43140248],
       [-0.60080131, -0.28266499, -0.25992471, -0.43140248, -0.43140248]])

Compute linear kernel (G-Matrix) and save it as a R object

In [3]:
p = scaledGenotype.shape[1] # Number of markers
linearKernel = np.dot(scaledGenotype,scaledGenotype.T)/p
linearKernel = np.array(linearKernel, dtype="float64") # <- convert to double precision numeric since R doesn't have unsigned ints
ro = numpy2ri(linearKernel)     
r.assign("kernelMatrix", ro)
r("save(kernelMatrix, file='kernelMatrix_G.gzip', compress=TRUE)") 


rpy2.rinterface.NULL

Compute Euclidean distance based Gaussian kernel and save it as a R object

In [4]:
from sklearn.metrics.pairwise import euclidean_distances
euclideanDistKernel = ((euclidean_distances(scaledGenotype))**2) /p
h = 0.5
euclideanDistKernel = np.exp( - h*euclideanDistKernel)
euclideanDistKernel = np.array(euclideanDistKernel, dtype="float64") # <- convert to double precision numeric since R doesn't have unsigned ints
ro = numpy2ri(euclideanDistKernel)     
r.assign("kernelMatrix", ro)
r("save(kernelMatrix, file='kernelMatrix_eu_dist.gzip', compress=TRUE)") 

rpy2.rinterface.NULL