# Extract overlapping genes with CMC

In [None]:
import functools
import numpy as np
import pandas as pd
from gtfparse import read_gtf

In [None]:
@functools.lru_cache()
def get_gtf(gtf_file):
    return read_gtf(gtf_file)


In [None]:
def gene_annotation(gtf_file, feature):
    gtf0 = get_gtf(gtf_file)
    gtf = gtf0[gtf0["feature"] == feature]
    return gtf[["gene_id", "gene_name", "gene_type", 
                "seqname", "start", "end", "strand"]]

In [None]:
gtf_file = '/ceph/genome/human/gencode25/gtf.CHR/_m/gencode.v25.annotation.gtf'
gtf_annot = gene_annotation(gtf_file, 'gene')
gtf_annot['ensemblID'] = gtf_annot.gene_id.str.replace("\\..*", "", regex=True)

## Male specific

In [None]:
cmc_file = "../../_m/cmc_all_deg_across_tissues_maleSpecific.csv"
df = gtf_annot.merge(pd.read_csv(cmc_file), on='ensemblID')
df.head(2)

### CMC DLPFC overlapping Caudate

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Caudate'] == 1)]

### CMC DLPFC overlapping DLPFC

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['DLPFC'] == 1)]

### CMC DLPFC overlapping Hippocampus

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Hippocampus'] == 1)]

### CMC DLPFC overlapping Caudate & DLPFC

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Caudate'] == 1) & (df['DLPFC'] == 1)]

## Female specific

In [None]:
cmc_file = "../../_m/cmc_all_deg_across_tissues_femaleSpecific.csv"
df = gtf_annot.merge(pd.read_csv(cmc_file), on='ensemblID')
df.head(2)

### CMC DLPFC overlapping Caudate

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Caudate'] == 1)]

### CMC DLPFC overlapping DLPFC

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['DLPFC'] == 1)]

### CMC DLPFC overlapping Hippocampus

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Hippocampus'] == 1)]

### CMC DLPFC overlapping Caudate & DLPFC

In [None]:
df[(df['CMC DLPFC'] == 1) & (df['Caudate'] == 1) & (df['DLPFC'] == 1)]