# Compare with Gene Sets

In [None]:
import json, pickle
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

## 1. Load Data and Set the Gene Sets

In [None]:
present_transcripts_df = pd.read_csv('../../data/expression_by_probe.csv', index_col=0)
experimental_groups = json.load(open('../../data/experimental_groups.json'))
gene_sets_dict = pickle.load(open('../../data/msigdb_marzec_gene_sets.p', 'rb'))
diffexp_genes_fold_change_ph = pickle.load(open('../../results/diffexp_genes_fold_change_ph.p', 'rb'))

In [None]:
# Keeping track of what condition each gene set measures, since the names are confusing
gene_sets = {
    'GSE8685_IL15_ACT_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL15_IL21_up',
    'GSE8685_IL15_ACT_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL15_IL21_dn',
    'GSE8685_IL2_ACT_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL15_Control_dn',
    'GSE8685_IL2_ACT_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL15_Control_up',
    'GSE8685_IL2_ACT_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL2_IL21_dn',
    'GSE8685_IL2_ACT_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL2_IL21_up',
    'GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL15_Control_dn',
    'GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL15_Control_up',
    'GSE8685_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL21_Control_dn',
    'GSE8685_IL2_STARVED_VS_IL21_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL21_Control_up',
    'GSE8685_IL2_STARVED_VS_IL2_ACT_IL2_STARVED_CD4_TCELL_DN': 'IL2_Control_dn',
    'GSE8685_IL2_STARVED_VS_IL2_ACT_IL2_STARVED_CD4_TCELL_UP': 'IL2_Control_up',
    'IL15_UP.V1_DN':'IL15_Control_dn',
    'IL15_UP.V1_UP':'IL15_Control_up',
    'IL21_UP.V1_DN':'IL21_Control_dn',
    'IL21_UP.V1_UP':'IL21_Control_up',
    'IL2_UP.V1_DN':'IL2_Control_dn',
    'IL2_UP.V1_UP':'IL2_Control_up',
    'MARZEC_IL2_SIGNALING_DN': 'IL2_Control_dn',
    'MARZEC_IL2_SIGNALING_UP':'IL2_Control_up',
}

## 2. Plot the Gene Set Overlaps

In [None]:
for gene_set in gene_sets:
    condition = gene_sets[gene_set].split('_')
    up_or_dn = condition[2]
    
    # Only comparing fold change from control
    if condition[1] == 'Control':
        # Get the relevant gene sets
        gene_set_genes = set(gene_sets_dict[gene_set])
        diff_exp_genes = set(diffexp_genes_fold_change_ph[f'{condition[1]}_{condition[0]}'][up_or_dn])

        # Plot Venn Diagrams
        fig = plt.figure()
        ax = plt.gca()
        venn2([gene_set_genes, diff_exp_genes], set_labels=[gene_set, 'our genes'], ax=ax)