# Extract male bias genes on the X chromosome

In [1]:
import session_info
import pandas as pd
from pyhere import here

In [2]:
def get_deg():
    fn = here("differential_expression/tissue_comparison/summary_table",
              "_m/differential_expression_analysis_4features_sex.txt.gz")
    df = pd.read_csv(fn, sep='\t').loc[:, ["Tissue", "Feature", "ensemblID", "Symbol", 
                                           "seqnames", "Type", "t", "Chrom_Type", "adj.P.Val"]]
    return df[(df["Type"] == "Gene") & (df["adj.P.Val"] < 0.05)].copy()

In [3]:
df = get_deg()

In [4]:
xci = pd.read_csv("../../_h/xci_status_hg19.txt", sep='\t')
xci["ensemblID"] = xci["Gene ID"].str.replace("\\..*", "", regex=True)
xci.head(2)

Unnamed: 0,Gene name,Gene ID,Chr,Start position,End position,Transcript type,Combined XCI status,ensemblID
0,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape,ENSG00000182378
1,GTPBP6,ENSG00000178605.8,X,220025,230886,protein_coding,escape,ENSG00000178605


In [5]:
xci.groupby("Combined XCI status").size()

Combined XCI status
escape       99
inactive    431
variable    101
dtype: int64

In [6]:
tt = df.merge(xci[(xci["Combined XCI status"] == "escape")], on="ensemblID")
tt[(tt['t'] > 0)]

Unnamed: 0,Tissue,Feature,ensemblID,Symbol,seqnames,Type,t,Chrom_Type,adj.P.Val,Gene name,Gene ID,Chr,Start position,End position,Transcript type,Combined XCI status
26,Caudate,CD99|ENSG00000002586.20,ENSG00000002586,CD99,chrX,Gene,20.718194,Allosome,9.927600000000001e-62,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
27,DLPFC,CD99|ENSG00000002586.20,ENSG00000002586,CD99,chrX,Gene,10.690853,Allosome,1.487166e-20,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
28,Hippocampus,CD99|ENSG00000002586.20,ENSG00000002586,CD99,chrX,Gene,10.917016,Allosome,1.493665e-21,CD99,ENSG00000002586.13,X,2609220,2659350,protein_coding,escape
34,Caudate,ZBED1|ENSG00000214717.12,ENSG00000214717,ZBED1,chrX,Gene,14.600959,Allosome,4.860361e-36,ZBED1,ENSG00000214717.5,X,2404455,2419008,protein_coding,escape
35,DLPFC,ZBED1|ENSG00000214717.12,ENSG00000214717,ZBED1,chrX,Gene,6.980985,Allosome,5.207188e-09,ZBED1,ENSG00000214717.5,X,2404455,2419008,protein_coding,escape
36,Hippocampus,ZBED1|ENSG00000214717.12,ENSG00000214717,ZBED1,chrX,Gene,13.595283,Allosome,1.234819e-31,ZBED1,ENSG00000214717.5,X,2404455,2419008,protein_coding,escape
37,Caudate,DHRSX|ENSG00000169084.15,ENSG00000169084,DHRSX,chrX,Gene,13.689327,Allosome,2.277083e-32,DHRSX,ENSG00000169084.8,X,2137557,2420846,protein_coding,escape
38,DLPFC,DHRSX|ENSG00000169084.15,ENSG00000169084,DHRSX,chrX,Gene,7.063517,Allosome,3.144477e-09,DHRSX,ENSG00000169084.8,X,2137557,2420846,protein_coding,escape
39,Hippocampus,DHRSX|ENSG00000169084.15,ENSG00000169084,DHRSX,chrX,Gene,10.147705,Allosome,7.500810999999999e-19,DHRSX,ENSG00000169084.8,X,2137557,2420846,protein_coding,escape
46,Caudate,PLCXD1|ENSG00000182378.15,ENSG00000182378,PLCXD1,chrX,Gene,11.285881,Allosome,4.067496e-23,PLCXD1,ENSG00000182378.8,X,192989,220023,protein_coding,escape


#### Escaped genes are also located on the PAR regions of the Y chromosome.

In [7]:
xlinked = df[(df['seqnames'] == 'chrX')].copy()
xx_male = df[(df['seqnames'].isin(["chrX", "chrY"])) & (df["t"] > 0)].copy()
xlinked_male = xlinked[(xlinked["t"] > 0)].copy()
xlinked_female = xlinked[(xlinked["t"] < 0)].copy()

In [8]:
xlinked.groupby("Tissue").size()

Tissue
Caudate        72
DLPFC          36
Hippocampus    44
dtype: int64

In [9]:
xlinked_male.groupby("Tissue").size()

Tissue
Caudate        22
DLPFC          14
Hippocampus    13
dtype: int64

In [10]:
xlinked_female.groupby("Tissue").size()

Tissue
Caudate        50
DLPFC          22
Hippocampus    31
dtype: int64

In [11]:
xlinked_male

Unnamed: 0,Tissue,Feature,ensemblID,Symbol,seqnames,Type,t,Chrom_Type,adj.P.Val
53,Caudate,CD99|ENSG00000002586.20,ENSG00000002586,CD99,chrX,Gene,20.718194,Allosome,9.927600000000001e-62
56,Caudate,PRKCIP1|ENSG00000237682.2,ENSG00000237682,PRKCIP1,chrX,Gene,17.897618,Allosome,9.042190999999998e-50
62,Caudate,ZBED1|ENSG00000214717.12,ENSG00000214717,ZBED1,chrX,Gene,14.600959,Allosome,4.860361e-36
65,Caudate,DHRSX|ENSG00000169084.15,ENSG00000169084,DHRSX,chrX,Gene,13.689327,Allosome,2.277083e-32
66,Caudate,ENSG00000289007|ENSG00000289007.2,ENSG00000289007,ENSG00000289007,chrX,Gene,12.947296,Allosome,1.9787790000000001e-29
72,Caudate,PLCXD1|ENSG00000182378.15,ENSG00000182378,PLCXD1,chrX,Gene,11.285881,Allosome,4.067496e-23
77,Caudate,GTPBP6|ENSG00000178605.14,ENSG00000178605,GTPBP6,chrX,Gene,10.387204,Allosome,6.926781e-20
91,Caudate,ASMTL|ENSG00000169093.16,ENSG00000169093,ASMTL,chrX,Gene,7.896321,Allosome,9.027584e-12
94,Caudate,PPP2R3B|ENSG00000167393.18,ENSG00000167393,PPP2R3B,chrX,Gene,7.727152,Allosome,2.781165e-11
100,Caudate,CD99P1|ENSG00000223773.9,ENSG00000223773,CD99P1,chrX,Gene,6.853646,Allosome,7.729268e-09


In [12]:
xlinked_male.merge(xci[["ensemblID", "Combined XCI status"]], on="ensemblID", how="left").fillna("unknown")

Unnamed: 0,Tissue,Feature,ensemblID,Symbol,seqnames,Type,t,Chrom_Type,adj.P.Val,Combined XCI status
0,Caudate,CD99|ENSG00000002586.20,ENSG00000002586,CD99,chrX,Gene,20.718194,Allosome,9.927600000000001e-62,escape
1,Caudate,PRKCIP1|ENSG00000237682.2,ENSG00000237682,PRKCIP1,chrX,Gene,17.897618,Allosome,9.042190999999998e-50,unknown
2,Caudate,ZBED1|ENSG00000214717.12,ENSG00000214717,ZBED1,chrX,Gene,14.600959,Allosome,4.860361e-36,escape
3,Caudate,DHRSX|ENSG00000169084.15,ENSG00000169084,DHRSX,chrX,Gene,13.689327,Allosome,2.277083e-32,escape
4,Caudate,ENSG00000289007|ENSG00000289007.2,ENSG00000289007,ENSG00000289007,chrX,Gene,12.947296,Allosome,1.9787790000000001e-29,unknown
5,Caudate,PLCXD1|ENSG00000182378.15,ENSG00000182378,PLCXD1,chrX,Gene,11.285881,Allosome,4.067496e-23,escape
6,Caudate,GTPBP6|ENSG00000178605.14,ENSG00000178605,GTPBP6,chrX,Gene,10.387204,Allosome,6.926781e-20,escape
7,Caudate,ASMTL|ENSG00000169093.16,ENSG00000169093,ASMTL,chrX,Gene,7.896321,Allosome,9.027584e-12,escape
8,Caudate,PPP2R3B|ENSG00000167393.18,ENSG00000167393,PPP2R3B,chrX,Gene,7.727152,Allosome,2.781165e-11,escape
9,Caudate,CD99P1|ENSG00000223773.9,ENSG00000223773,CD99P1,chrX,Gene,6.853646,Allosome,7.729268e-09,escape


In [13]:
dx = xlinked_male.merge(xci[["ensemblID", "Combined XCI status"]], on="ensemblID", how="left").fillna("unknown")
dx = dx[(dx["Combined XCI status"] == "unknown")].copy()

In [14]:
pd.concat([xx_male.merge(xci[["ensemblID", "Combined XCI status"]], on="ensemblID"), dx], axis=0)\
  .sort_values(["Tissue",  "Combined XCI status", "seqnames"], ascending=True)\
  .to_csv("BrainSeq_male_biased_genes_XCI_status.tsv", sep='\t', index=False)

## Session information

In [15]:
session_info.show()