# Extract male bias genes on the X chromosome

In [1]:
import pandas as pd

In [2]:
def get_deg(tissue):
    fn = "../../../../differential_expression/%s/" % tissue +\
    "metrics_summary/_m/chrom_annotation_genes.txt"
    return pd.read_csv(fn, sep='\t').loc[:, ["gene_id", "seqname", "Symbol", 
                                                         "t", "adj.P.Val"]]

In [3]:
df = pd.DataFrame()
for tissue in ["caudate", "dlpfc", "hippocampus"]:
    dt = get_deg(tissue)
    dt["Tissue"] = tissue
    df = pd.concat([df, dt], axis=0)
df["ensemblID"] = df.gene_id.str.replace("\\..*", "", regex=True)
df.shape

(1058, 7)

In [4]:
xci = pd.read_csv("../../_h/xci_status_hg19.txt", sep='\t')
xci["ensemblID"] = xci["Gene ID"].str.replace("\\..*", "", regex=True)
xci.head(2)

Unnamed: 0,Gene name,Gene ID,Chr,Start position,End position,Transcript type,Combined XCI status,ensemblID
0,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape,ENSG00000182378
1,GTPBP6,ENSG00000178605.8,X,220025,230886,protein_coding,escape,ENSG00000178605


In [5]:
xci.groupby("Combined XCI status").size()

Combined XCI status
escape       99
inactive    431
variable    101
dtype: int64

In [6]:
tt = df.merge(xci[(xci["Combined XCI status"] == "escape")], on="ensemblID")
tt[(tt['t'] > 0)]

Unnamed: 0,gene_id,seqname,Symbol,t,adj.P.Val,Tissue,ensemblID,Gene name,Gene ID,Chr,Start position,End position,Transcript type,Combined XCI status
55,ENSG00000182378.13_PAR_Y,chrY,PLCXD1,6.988151,4.239261e-09,caudate,ENSG00000182378,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape
56,ENSG00000182378.13_PAR_Y,chrY,PLCXD1,6.107645,9.826634e-07,dlpfc,ENSG00000182378,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape
57,ENSG00000182378.13_PAR_Y,chrY,PLCXD1,4.772588,0.000796611,hippocampus,ENSG00000182378,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape
72,ENSG00000002586.18_PAR_Y,chrY,CD99,4.264277,0.003645847,caudate,ENSG00000002586,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
73,ENSG00000002586.18_PAR_Y,chrY,CD99,3.901722,0.01477514,dlpfc,ENSG00000002586,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
74,ENSG00000002586.18_PAR_Y,chrY,CD99,4.073952,0.01481474,hippocampus,ENSG00000002586,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
80,ENSG00000169093.15_PAR_Y,chrY,ASMTL,3.831986,0.01701108,dlpfc,ENSG00000169093,ASMTL,ENSG00000169093.10,X,1522032,1572655,protein_coding,escape
83,ENSG00000178605.13_PAR_Y,chrY,GTPBP6,3.599197,0.02847852,dlpfc,ENSG00000178605,GTPBP6,ENSG00000178605.8,X,220025,230886,protein_coding,escape


#### Escaped genes are also located on the PAR regions of the Y chromosome.

In [7]:
xlinked = df[(df['seqname'] == 'chrX')].copy()
xlinked_male = xlinked[(xlinked["t"] > 0)].copy()
xlinked_female = xlinked[(xlinked["t"] < 0)].copy()

In [8]:
xlinked.groupby("Tissue").size()

Tissue
caudate        45
dlpfc          60
hippocampus    31
dtype: int64

In [9]:
xlinked_male.groupby("Tissue").size()

Tissue
caudate         3
dlpfc          18
hippocampus     1
dtype: int64

In [10]:
xlinked_female.groupby("Tissue").size()

Tissue
caudate        42
dlpfc          42
hippocampus    30
dtype: int64

In [11]:
xlinked_male

Unnamed: 0,gene_id,seqname,Symbol,t,adj.P.Val,Tissue,ensemblID
126,ENSG00000213468.4,chrX,FIRRE,4.689886,0.000705,caudate,ENSG00000213468
132,ENSG00000186675.6,chrX,MAGEE2,4.613375,0.000956,caudate,ENSG00000186675
133,ENSG00000102001.12,chrX,CACNA1F,4.608831,0.000963,caudate,ENSG00000102001
97,ENSG00000172465.13,chrX,TCEAL1,4.402338,0.003347,dlpfc,ENSG00000172465
99,ENSG00000236064.1,chrX,,4.36458,0.003866,dlpfc,ENSG00000236064
125,ENSG00000277883.1,chrX,NLRP3P1,4.155232,0.007473,dlpfc,ENSG00000277883
130,ENSG00000184515.10,chrX,BEX5,4.126387,0.008105,dlpfc,ENSG00000184515
199,ENSG00000204071.10,chrX,TCEAL6,3.836015,0.017011,dlpfc,ENSG00000204071
232,ENSG00000147155.10,chrX,EBP,3.75115,0.02036,dlpfc,ENSG00000147155
277,ENSG00000232119.7,chrX,MCTS1,3.632172,0.026693,dlpfc,ENSG00000232119


In [12]:
xci["ensemblID"] = xci["Gene ID"].str.replace("\\..*", "", regex=True)
xlinked_male["ensemblID"] = xlinked_male.gene_id.str.replace("\\..*", "", regex=True)
xlinked_male.merge(xci[["ensemblID", "Combined XCI status"]], on="ensemblID")

Unnamed: 0,gene_id,seqname,Symbol,t,adj.P.Val,Tissue,ensemblID,Combined XCI status
0,ENSG00000213468.4,chrX,FIRRE,4.689886,0.000705,caudate,ENSG00000213468,variable
1,ENSG00000102001.12,chrX,CACNA1F,4.608831,0.000963,caudate,ENSG00000102001,inactive
2,ENSG00000172465.13,chrX,TCEAL1,4.402338,0.003347,dlpfc,ENSG00000172465,inactive
3,ENSG00000236064.1,chrX,,4.36458,0.003866,dlpfc,ENSG00000236064,inactive
4,ENSG00000232119.7,chrX,MCTS1,3.632172,0.026693,dlpfc,ENSG00000232119,inactive
5,ENSG00000198932.12,chrX,GPRASP1,3.611593,0.027517,dlpfc,ENSG00000198932,inactive
6,ENSG00000102054.17,chrX,RBBP7,3.530376,0.03247,dlpfc,ENSG00000102054,variable
7,ENSG00000184905.8,chrX,TCEAL2,3.428241,0.039058,dlpfc,ENSG00000184905,inactive
8,ENSG00000184867.13,chrX,ARMCX2,3.417176,0.039832,dlpfc,ENSG00000184867,inactive
9,ENSG00000102401.19,chrX,ARMCX3,3.328593,0.045811,dlpfc,ENSG00000102401,inactive
