# Generate supplementary data for DEGs

In [1]:
import pandas as pd

## Function

In [2]:
def get_tissues_DEG(tissue):
    cols = ["Feature", "gencodeID", "ensemblID", "Symbol", "logFC", 
            "AveExpr", "t", "P.Value", "adj.P.Val", "Type"]
    tissue_map = {"Caudate": "caudate", "Dentate Gyrus": "dentateGyrus", 
                  "DLPFC": "dlpfc", "Hippocampus": "hippocampus"}
    gg = pd.read_csv("../../../%s/_m/genes/diffExpr_EAvsAA_full.txt" % tissue_map[tissue], 
                     sep='\t', index_col=0)
    gg["Feature"] = gg.index; gg["Type"] = "Gene"
    tt = pd.read_csv("../../../%s/_m/transcripts/diffExpr_EAvsAA_full.txt" % tissue_map[tissue], 
                     sep='\t', index_col=0)\
           .rename(columns={"gene_id": "gencodeID", "gene_name": "Symbol"})
    tt["ensemblID"] = tt.gencodeID.str.replace("\\..*", "", regex=True)
    tt["Feature"] = tt.index; tt["Type"] = "Transcript"
    ee = pd.read_csv("../../../%s/_m/exons/diffExpr_EAvsAA_full.txt" % tissue_map[tissue], 
                     sep='\t', index_col=0)
    ee["Feature"] = ee.index; ee["Type"] = "Exon"
    jj = pd.read_csv("../../../%s/_m/junctions/diffExpr_EAvsAA_full.txt" % tissue_map[tissue],
                     sep='\t', index_col=0)\
           .drop(["Symbol"], axis=1)\
           .rename(columns={"newGeneID": "gencodeID", "newGeneSymbol": "Symbol"})
    jj["ensemblID"] = jj.gencodeID.str.replace("\\..*", "", regex=True)
    jj["Feature"] = jj.index; jj["Type"] = "Junction"
    df = pd.concat([gg.reset_index().loc[:, cols], tt.reset_index().loc[:, cols], 
                    ee.reset_index().loc[:, cols], jj.reset_index().loc[:, cols]], axis=0)
    df["Tissue"] = tissue
    return df

## Main

In [3]:
caudate = get_tissues_DEG("Caudate")
dlpfc = get_tissues_DEG("DLPFC")
gyrus = get_tissues_DEG("Dentate Gyrus")
hippo = get_tissues_DEG("Hippocampus")

In [None]:
df = pd.concat([caudate, dlpfc, gyrus, hippo], axis=0)
df.shape

In [None]:
df.groupby(["Tissue", "Type"]).size()

### Save files

In [None]:
df.to_csv("diffExpr_ancestry_full_4regions.tsv", sep='\t', index=False)

In [None]:
df[(df["adj.P.Val"] < 0.05)].to_csv("diffExpr_ancestry_FDR05_4regions.tsv", sep='\t', index=False)