In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.io import mmread
import scipy.sparse as sp_sparse

In [9]:
def read_10x(ref):
    """
    Read Gene X Barcodes matrix from 10X mapping output files
    mtx_file, gene_file, barcode_file

    """
    mtx=ref+"matrix.mtx"
    gene=ref+"genes.tsv"
    barcode=ref+"barcodes.tsv"
    return pd.DataFrame(mmread(mtx).todense(),index=[line.rstrip().split()[-1] for line in open(gene)],columns=[line.rstrip() for line in open(barcode)]).astype(int)

def norm(data,method): 
    if method=="DESeq":
        data=np.log2(data/(data.T/data.apply(gmean,axis=1)).dropna(axis=1,how='any').apply(np.median,axis=1)+1)
    
    if method=="Seurat_norm":
        data=np.log(data/data.apply(lambda x:x.sum())*10000+1)
    if method=="qnorm":
        rank_mean = data.stack().groupby(data.rank(method='first').stack().astype(int)).mean()
        data=np.log2(data.rank(method='min').stack().astype(int).map(rank_mean).unstack()+1)

    return data

In [5]:
data=read_10x('../data/cellranger_self_mm10_p5/')

In [6]:
data.drop(data[data.index.str.match(r'Rps')].index,inplace=True)
data.drop(data[data.index.str.match(r'Rp')].index,inplace=True)
cell_num=data[data>=2].count(axis=1)
data.drop(cell_num.loc[cell_num<3].index,inplace=True)
gene_num=data[data>1].count(axis=0)
data.drop(gene_num.loc[gene_num<200].index,inplace=True,axis=1)
data

Unnamed: 0,AAACCTGGTCGACTGC-1,AAACGGGAGCTAACAA-1,AAACGGGCACATGGGA-1,AAACGGGGTAGCGTGA-1,AAACGGGGTATCGCAT-1,AAAGATGAGGACCACA-1,AAAGATGCAATCTGCA-1,AAAGATGGTGCAGGTA-1,AAAGATGGTTTGTTTC-1,AAAGATGTCCAAATGC-1,...,TTTGCGCTCGTAGATC-1,TTTGGTTAGAGGACGG-1,TTTGGTTCAATGGAGC-1,TTTGGTTGTGTTTGTG-1,TTTGGTTTCTACCAGA-1,TTTGTCACAAGTCATC-1,TTTGTCACACCACCAG-1,TTTGTCACACTTGGAT-1,TTTGTCACAGACGCAA-1,TTTGTCAGTTAGATGA-1
Mrpl15,0,0,0,0,4,1,0,1,0,0,...,0,0,2,3,0,1,0,1,2,0
Lypla1,0,0,0,0,2,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,1
Tcea1,0,1,0,0,5,1,1,0,0,0,...,0,0,0,1,0,0,2,1,0,0
Atp6v1h,0,0,0,1,0,0,0,0,0,1,...,1,0,2,0,0,0,0,0,1,0
Rb1cc1,0,1,0,0,0,0,0,0,0,1,...,1,0,0,2,0,0,0,0,0,0
4732440D04Rik,0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
Pcmtd1,0,0,0,0,2,0,0,0,1,0,...,1,0,0,3,0,1,0,0,0,1
Rrs1,0,0,1,0,2,0,0,0,0,0,...,0,1,3,0,0,0,0,0,0,0
Vcpip1,1,0,0,0,1,1,0,0,0,0,...,0,0,0,2,1,0,0,0,1,0
Sgk3,0,0,0,0,1,1,0,0,0,0,...,0,0,3,1,0,0,0,0,0,0


In [13]:
qnorm=norm(data,"qnorm")
#qnorm.to_csv('../data/10x_1993_log2_q_norm.txt',index=True,sep='\t')
qnorm

Unnamed: 0,AAACCTGGTCGACTGC-1,AAACGGGAGCTAACAA-1,AAACGGGCACATGGGA-1,AAACGGGGTAGCGTGA-1,AAACGGGGTATCGCAT-1,AAAGATGAGGACCACA-1,AAAGATGCAATCTGCA-1,AAAGATGGTGCAGGTA-1,AAAGATGGTTTGTTTC-1,AAAGATGTCCAAATGC-1,...,TTTGCGCTCGTAGATC-1,TTTGGTTAGAGGACGG-1,TTTGGTTCAATGGAGC-1,TTTGGTTGTGTTTGTG-1,TTTGGTTTCTACCAGA-1,TTTGTCACAAGTCATC-1,TTTGTCACACCACCAG-1,TTTGTCACACTTGGAT-1,TTTGTCACAGACGCAA-1,TTTGTCAGTTAGATGA-1
Mrpl15,0.000000,0.000000,0.000000,0.000000,1.033627,0.350455,0.000000,0.185843,0.000000,0.000000,...,0.000000,0.000000,0.705498,0.550533,0.000000,0.323520,0.000000,0.192193,1.706164,0.000000
Lypla1,0.000000,0.000000,0.000000,0.000000,0.269895,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.705498,0.005779,0.000000,0.000000,0.000000,0.000000,0.000000,0.957765
Tcea1,0.000000,1.111742,0.000000,0.000000,1.227245,0.350455,0.210451,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.005779,0.000000,0.000000,1.875916,0.192193,0.000000,0.000000
Atp6v1h,0.000000,0.000000,0.000000,1.044197,0.000000,0.000000,0.000000,0.000000,0.000000,0.997102,...,0.692119,0.000000,0.705498,0.000000,0.000000,0.000000,0.000000,0.000000,0.800410,0.000000
Rb1cc1,0.000000,1.111742,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.997102,...,0.692119,0.000000,0.000000,0.208573,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4732440D04Rik,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.692119,0.000000,0.112077,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Pcmtd1,0.000000,0.000000,0.000000,0.000000,0.269895,0.000000,0.000000,0.000000,0.281853,0.000000,...,0.692119,0.000000,0.000000,0.550533,0.000000,0.323520,0.000000,0.000000,0.000000,0.957765
Rrs1,0.000000,0.000000,1.078193,0.000000,0.269895,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.872164,1.240785,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Vcpip1,1.051200,0.000000,0.000000,0.000000,0.009380,0.350455,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.208573,0.701497,0.000000,0.000000,0.000000,0.800410,0.000000
Sgk3,0.000000,0.000000,0.000000,0.000000,0.009380,0.350455,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.240785,0.005779,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
