# Examining the overlap between antipsychotics analysis with main DE analysis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2,venn3

## Load data

### Load DE genes results: BrainSEQ caudate

In [None]:
def get_degs(df, fdr):
    return df[(df["adj.P.Val"] < fdr)]


ap = get_degs(pd.read_csv("../../../_m/genes/diffExpr_sz_APVctl_full.txt", sep='\t', index_col=0), 0.05)
noap = get_degs(pd.read_csv("../../../_m/genes/diffExpr_sz_noAPVctl_full.txt", sep='\t', index_col=0), 0.05)
degs = get_degs(pd.read_csv("../../../../_m/genes/diffExpr_szVctl_full.txt", sep='\t', index_col=0), 0.05)

### Load animal study results

In [None]:
def mouse2human():
    fn = "/ceph/users/jbenja13/projects/aanri/racial_diff/input/celltypes/_h/cell_type/mouse2human_gene.txt"
    return pd.read_csv(fn, sep='\t')

chong_2002 = pd.read_csv("../../_m/Chong2002_rat.csv")
kim_2018 = pd.read_csv("../../_m/Kim2018_mice.csv")\
             .merge(mouse2human(), left_on="Gene symbol", right_on="Symbol_mouse")
kim_2018 = kim_2018[(kim_2018["Tissue"] == "Striatum")].copy()
korostynski_2013 = pd.read_csv("../../_m/Korostynski2013_mice.csv")\
                     .merge(mouse2human(), left_on="Gene_symbol", right_on="Symbol_mouse")
korostynski_2013 = korostynski_2013[(korostynski_2013["ANOVA_drug_FDR"] < 0.05)]\
        .groupby("Gene_symbol").first().reset_index().sort_values("ANOVA_drug_FDR")

## Study overlap

### Percentage of overlap

#### SZ vs control

In [None]:
a = len(set(degs.Symbol) & set(chong_2002.loc[:, "Gene Name (Human)"]))
b = a / len(set(chong_2002.loc[:, "Gene Name (Human)"]))
c = a / len(set(degs.Symbol))
print("Chong 2002 total DE: %d\n" % len(set(chong_2002.loc[:, "Gene Name (Human)"])))
print("Overlap with Chong 2002 (#): %d\nPercent Overlap (Chong): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(degs.Symbol) & set(korostynski_2013.Symbol_human))
b = a / len(set(korostynski_2013.Symbol_human))
c = a / len(set(degs.Symbol))
print("Korostynski 2013 total DE: %d\n" % len(set(korostynski_2013.Symbol_human)))
print("Overlap with Korostynski 2013 (#): %d\nPercent Overlap (Korostynski): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(degs.Symbol) & set(kim_2018.Symbol_human))
b = a / len(set(kim_2018.Symbol_human))
c = a / len(set(degs.Symbol))
print("Kim 2018 total DE: %d\n" % len(set(kim_2018.Symbol_human)))
print("Overlap with Kim 2018 (#): %d\nPercent Overlap (Kim): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

#### SZ AP vs control

In [None]:
a = len(set(ap.Symbol) & set(chong_2002.loc[:, "Gene Name (Human)"]))
b = a / len(set(chong_2002.loc[:, "Gene Name (Human)"]))
c = a / len(set(ap.Symbol))
print("Chong 2002 total DE: %d\n" % len(set(chong_2002.loc[:, "Gene Name (Human)"])))
print("Overlap with Chong 2002 (#): %d\nPercent Overlap (Chong): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(ap.Symbol) & set(korostynski_2013.Symbol_human))
b = a / len(set(korostynski_2013.Symbol_human))
c = a / len(set(ap.Symbol))
print("Korostynski 2013 total DE: %d\n" % len(set(korostynski_2013.Symbol_human)))
print("Overlap with Korostynski 2013 (#): %d\nPercent Overlap (Korostynski): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(ap.Symbol) & set(kim_2018.Symbol_human))
b = a / len(set(kim_2018.Symbol_human))
c = a / len(set(ap.Symbol))
print("Kim 2018 total DE: %d\n" % len(set(kim_2018.Symbol_human)))
print("Overlap with Kim 2018 (#): %d\nPercent Overlap (Kim): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

#### SZ noAP vs control

In [None]:
a = len(set(noap.Symbol) & set(chong_2002.loc[:, "Gene Name (Human)"]))
b = a / len(set(chong_2002.loc[:, "Gene Name (Human)"]))
c = a / len(set(noap.Symbol))
print("Chong 2002 total DE: %d\n" % len(set(chong_2002.loc[:, "Gene Name (Human)"])))
print("Overlap with Chong 2002 (#): %d\nPercent Overlap (Chong): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(noap.Symbol) & set(korostynski_2013.Symbol_human))
b = a / len(set(korostynski_2013.Symbol_human))
c = a / len(set(noap.Symbol))
print("Korostynski 2013 total DE: %d\n" % len(set(korostynski_2013.Symbol_human)))
print("Overlap with Korostynski 2013 (#): %d\nPercent Overlap (Korostynski): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

In [None]:
a = len(set(noap.Symbol) & set(kim_2018.Symbol_human))
b = a / len(set(kim_2018.Symbol_human))
c = a / len(set(noap.Symbol))
print("Kim 2018 total DE: %d\n" % len(set(kim_2018.Symbol_human)))
print("Overlap with Kim 2018 (#): %d\nPercent Overlap (Kim): %.1f%%\nPercent Overlap (BrainSEQ): %.1f%%" % (a, b*100, c*100))

### Plot venn diagrams

In [None]:
plt.rcParams.update({'font.size': 22, 'font.weight': 'bold'})

In [None]:
def plot_pairwise_venn(gene_names, dfx, label1, label2):
    fn = "venn_%s_%s" % (label1.replace(" ", "_"), label2.replace(" ", "_"))
    plt.figure(figsize=(8,8))
    v = venn2([gene_names, set(dfx.Symbol)], 
              set_labels = (label1, "BrainSEQ (%s)"%label2))
    v.get_patch_by_id('10').set_color('red')
    v.get_patch_by_id('10').set_alpha(0.75)
    v.get_patch_by_id('01').set_color('blue')
    v.get_patch_by_id('01').set_alpha(0.75)
    try:
        v.get_patch_by_id('11').set_color('purple')
        v.get_patch_by_id('11').set_alpha(0.75)
    except AttributeError:
        print("There is no overlap!")
    plt.savefig('%s.png' % fn)
    plt.savefig('%s.pdf' % fn)

#### SZ vs control

In [None]:
plot_pairwise_venn(set(chong_2002.loc[:, "Gene Name (Human)"]), degs, "Chong 2002", "SZ")
plot_pairwise_venn(set(korostynski_2013.Symbol_human), degs, "Korostynski 2013", "SZ")
plot_pairwise_venn(set(kim_2018.Symbol_human), degs, "Kim 2018", "SZ")

#### SZ AP vs control

In [None]:
plot_pairwise_venn(set(chong_2002.loc[:, "Gene Name (Human)"]), ap, "Chong 2002", "SZ AP")
plot_pairwise_venn(set(korostynski_2013.Symbol_human), ap, "Korostynski 2013", "SZ AP")
plot_pairwise_venn(set(kim_2018.Symbol_human), ap, "Kim 2018", "SZ AP")

#### SZ AP vs control

In [None]:
plot_pairwise_venn(set(chong_2002.loc[:, "Gene Name (Human)"]), noap, "Chong 2002", "SZ noAP")
plot_pairwise_venn(set(korostynski_2013.Symbol_human), noap, "Korostynski 2013", "SZ noAP")
plot_pairwise_venn(set(kim_2018.Symbol_human), noap, "Kim 2018", "SZ noAP")