# Analysis Part VII - Differentially Expressed Genes

In [None]:
%load_ext autoreload
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore')
import os
import scanpy as sc
import scirpy as ir
import anndata as ann
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mudata import MuData
import mudata

import tarfile
import warnings
from glob import glob

import anndata
import matplotlib.pyplot as plt
import muon as mu
import pandas as pd
import scanpy as sc
import scirpy as ir

%autoreload 2
import sys
sys.path.append('..')
import utility.annotation as utils_annotation
import utility.representation as utils_representation
import utility.visualisation as utils_vis

In [None]:
sc.settings.set_figure_params(dpi=150)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=True, color_map='viridis', transparent=True)
sb.set_style('whitegrid')

from matplotlib.colors import LinearSegmentedColormap
colors = ['darkblue', 'blue', 'lightblue', '#FFEA00', 'orange', 'red', 'darkred']
cust = LinearSegmentedColormap.from_list('custom_cmap', colors)
colormap = cust

## Get input data

In [None]:
mdata = mu.read('/Users/mimi/Sina/data_specificity_annotated_final_pseudotime_cite.h5mu')

## Differences between Day0 naive and Dayx naive cells

In [None]:
adata_tmp = mdata["gex"][(mdata["gex"].obs['time'].isin(['d0', 'd365'])) & (mdata["gex"].obs['specific_new'].isin(['NS4B214'])) & (mdata["gex"].obs['FACS_Phenotype_v3'].isin(['N']))]
adata_tmp

In [None]:
adata_tmp = mdata["gex"][(mdata["gex"].obs['time'].isin(['d0', 'd365'])) & (mdata["gex"].obs['specific_new'].isin(['NS4B214'])) & (mdata["gex"].obs['FACS_Phenotype_v3'].isin(['N']))]
sc.tl.rank_genes_groups(adata_tmp, groupby='time', groups=['d0', 'd365'], reference='d365', key_added=f'DEG_d0_d365_NS4B214_naive_results')
    
names = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_naive_results']['names'])
scores = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_naive_results']['scores'])
pvals_adj = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_naive_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_naive_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)
    
up = DEG[(DEG['logfc']>1) & (DEG['padj']<0.05)]
up.to_csv(f'/Users/mimi/Sina/7_DEG/DEG_d0_over_d365_specific_naive_cells_genes_up.csv')
    
down = DEG[(DEG['logfc']<-1) & (DEG['padj']<0.05)]
down.to_csv(f'/Users/mimi/Sina/7_DEG/DEG_d0_over_d365_specific_naive_cells_genes_down.csv')

In [None]:
adata_tmp.obs

In [None]:
adata_tmp = mdata["gex"][(mdata["gex"].obs['time'].isin(['d0', 'd365'])) & (mdata["gex"].obs['specific_new'].isin(['NS4B214'])) & (mdata["gex"].obs['leiden'].isin(['4', '10']))]
sc.tl.rank_genes_groups(adata_tmp, groupby='time', groups=['d0', 'd365'], reference='d365', key_added=f'DEG_d0_d365_NS4B214_results')
    
names = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_results']['names'])
scores = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_results']['scores'])
pvals_adj = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(adata_tmp.uns[f'DEG_d0_d365_NS4B214_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)
    
up = DEG[(DEG['logfc']>1) & (DEG['padj']<0.05)]
up.to_csv(f'/Users/mimi/Sina/7_DEG/DEG_d0_over_d365_specific_cells_genes_up.csv')
    
down = DEG[(DEG['logfc']<-1) & (DEG['padj']<0.05)]
down.to_csv(f'/Users/mimi/Sina/7_DEG/DEG_d0_over_d365_specific_cells_genes_down.csv')

## Differences between NS4B214 specific and unspecific cells per time

In [None]:
time = ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']

for t in time:
    adata_tmp = mdata["gex"][mdata["gex"].obs['time']==t]
    sc.tl.rank_genes_groups(adata_tmp, groupby='specific_new', groups=['NS4B214', 'no_binding'], reference='no_binding', key_added=f'DEG_{t}_NS4B214_results')
    
    names = pd.DataFrame(adata_tmp.uns[f'DEG_{t}_NS4B214_results']['names'])
    scores = pd.DataFrame(adata_tmp.uns[f'DEG_{t}_NS4B214_results']['scores'])
    pvals_adj = pd.DataFrame(adata_tmp.uns[f'DEG_{t}_NS4B214_results']['pvals_adj'])
    logfoldchanges = pd.DataFrame(adata_tmp.uns[f'DEG_{t}_NS4B214_results']['logfoldchanges'])

    names.columns = ['names']
    scores.columns = ['scores']
    pvals_adj.columns = ['padj']
    logfoldchanges.columns = ['logfc']

    DEG = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)
    
    up = DEG[(DEG['logfc']>1) & (DEG['padj']<0.05)]
    up.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_{t}_NS4B214_over_nobinding_genes_up.csv')
    
    down = DEG[(DEG['logfc']<-1) & (DEG['padj']<0.05)]
    down.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_{t}_NS4B214_over_nobinding_genes_down.csv')

In [None]:
sc.pl.umap(mdata['gex'], color='leiden')

## Difference Cluster 4 and 10

In [None]:
sc.tl.rank_genes_groups(mdata["gex"], groupby='leiden', groups=['4', '10'], reference='4', key_added='DEG_4_10_results')

In [None]:
names = pd.DataFrame(mdata["gex"].uns['DEG_4_10_results']['names'])
scores = pd.DataFrame(mdata["gex"].uns['DEG_4_10_results']['scores'])
pvals_adj = pd.DataFrame(mdata["gex"].uns['DEG_4_10_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(mdata["gex"].uns['DEG_4_10_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG4_10 = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)

In [None]:
up = DEG4_10[(DEG4_10['logfc']>1) & (DEG4_10['padj']<0.05)]
up.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_cluster_10_over_4_genes_up.csv')

In [None]:
down = DEG4_10[(DEG4_10['logfc']<-1) & (DEG4_10['padj']<0.05)]
down.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_cluster_10_over_4_genes_down.csv')

## Difference Cluster 4 and 10 only specific cells

In [None]:
mdata_tmp = mdata["gex"][mdata['gex'].obs['specific_new']=='NS4B214']

In [None]:
sc.tl.rank_genes_groups(mdata_tmp, groupby='leiden', groups=['4', '10'], reference='4', key_added='DEG_4_10_specific_results')

In [None]:
names = pd.DataFrame(mdata_tmp.uns['DEG_4_10_specific_results']['names'])
scores = pd.DataFrame(mdata_tmp.uns['DEG_4_10_specific_results']['scores'])
pvals_adj = pd.DataFrame(mdata_tmp.uns['DEG_4_10_specific_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(mdata_tmp.uns['DEG_4_10_specific_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG4_10 = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)

In [None]:
up = DEG4_10[(DEG4_10['logfc']>1) & (DEG4_10['padj']<0.05)]
up.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_cluster_10_over_4_genes_up_specific_cells.csv')

In [None]:
down = DEG4_10[(DEG4_10['logfc']<-1) & (DEG4_10['padj']<0.05)]
down.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_cluster_10_over_4_genes_down_specific_cells.csv')

## Difference specific cells, FACS naive cells from Day 0 versus other time points

In [None]:
mdata_tmp = mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')&(mdata['gex'].obs['FACS_Phenotype']=='N')]
mdata_tmp.obs['help_time'] = 'd0'
mdata_tmp.obs['help_time'][mdata_tmp.obs['time'] != 'd0'] = 'dx'

In [None]:
sc.tl.rank_genes_groups(mdata_tmp, groupby='help_time', groups=['d0', 'dx'], reference='d0', key_added='DEG_d0_other_specific_results')

In [None]:
names = pd.DataFrame(mdata_tmp.uns['DEG_d0_other_specific_results']['names'])
scores = pd.DataFrame(mdata_tmp.uns['DEG_d0_other_specific_results']['scores'])
pvals_adj = pd.DataFrame(mdata_tmp.uns['DEG_d0_other_specific_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(mdata_tmp.uns['DEG_d0_other_specific_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG4_10 = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)

In [None]:
up = DEG4_10[(DEG4_10['logfc']>1) & (DEG4_10['padj']<0.05)]
up.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_other_over_d0_genes_up_specific_cells.csv')

In [None]:
down = DEG4_10[(DEG4_10['logfc']<-1) & (DEG4_10['padj']<0.05)]
down.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_other_over_d0_genes_down_specific_cells.csv')

## Difference of specific cells of Day 0 and Day x

In [None]:
mdata_tmp = mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')]

In [None]:
sc.tl.rank_genes_groups(mdata_tmp, groupby='time', groups=['d0', 'dx'], reference='d0', key_added='DEG_d0_dx_specific_results')

In [None]:
names = pd.DataFrame(mdata_tmp.uns['DEG_d0_dx_specific_results']['names'])
scores = pd.DataFrame(mdata_tmp.uns['DEG_d0_dx_specific_results']['scores'])
pvals_adj = pd.DataFrame(mdata_tmp.uns['DEG_d0_dx_specific_results']['pvals_adj'])
logfoldchanges = pd.DataFrame(mdata_tmp.uns['DEG_d0_dx_specific_results']['logfoldchanges'])

names.columns = ['names']
scores.columns = ['scores']
pvals_adj.columns = ['padj']
logfoldchanges.columns = ['logfc']

DEG4_10 = pd.concat([names, scores, pvals_adj, logfoldchanges], axis=1)

In [None]:
up = DEG4_10[(DEG4_10['logfc']>1) & (DEG4_10['padj']<0.05)]
up.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_dx_over_d0_genes_up_specific_cells.csv')

In [None]:
down = DEG4_10[(DEG4_10['logfc']<-1) & (DEG4_10['padj']<0.05)]
down.to_csv(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/DEG_dx_over_d0_genes_down_specific_cells.csv')

## Dotplots of Markergenes per Cluster

In [None]:
marker = pd.DataFrame(mdata["gex"].uns['rank_genes_groups_leiden']['names'])
genes = list(set(marker.values.flatten().tolist()))

In [None]:
sc.pl.dotplot(mdata["gex"], genes, show=True, groupby='leiden', title='Cluster marker genes by Leiden',
                     swap_axes=True, dendrogram=True, standard_scale='var')

In [None]:
sc.tl.rank_genes_groups(mdata["gex"], 'leiden', n_genes=10)

In [None]:
#top 5 variable
sc.pl.rank_genes_groups_dotplot(mdata["gex"], n_genes=5, standard_scale='var', cmap=cust)

In [None]:
var_names = {'Naive': ['CCR7', 'RPL32', 'RPS13', 'RPL13', 'SELL'],
              'CM': ['IL7R', 'PABPC1', 'RPLP1', 'RPS12', 'FTH1', 'S100A11', 'RPL13', 'RPL8'],
              'EM': ['GZMK', 'CXCR3', 'GAPDH', 'CCL5', 'CST7', 'HLA-A', 'CLIC3', 'TRAV12-2'],
              'EF': ['CCL5', 'CST7', 'NKG7', 'APOBEC3G', 'CXCR3', 'GZMH', 'GNLY', 'GZMB'],
              'IFN EF': ['IFITM1', 'LY6E', 'ISG20', 'IFI6', 'MX1', 'NKG7', 'CCL5'],
              'Cycling': ['CD74', 'ACTG1', 'ACTB', 'GAPDH', 'PFN1'],
              'Resting memory': ['LTB', 'CCR7', 'EIF1', 'NFKB2', 'ZFAS1', 'BTG1', 'RELB', 'CREM'] }

markers_short = {'Naive': ['RPL32', 'RPS13', 'SELL'],
              'CM': ['IL7R', 'PABPC1', 'RPLP1', 'RPS12', 'FTH1', 'S100A11', 'RPL8'],
              'EM': ['GZMK', 'HLA-A', 'CLIC3', 'TRAV12-2'],
              'EF': ['APOBEC3G', 'GZMH', 'GNLY', 'GZMB'],
              'IFN EF': ['IFITM1', 'LY6E', 'ISG20', 'IFI6', 'MX1'],
              'Cycling': ['CD74', 'ACTG1', 'ACTB', 'PFN1'],
              'Resting memory': ['LTB', 'EIF1', 'NFKB2', 'ZFAS1', 'BTG1', 'RELB', 'CREM'] }

sc.pl.dotplot(mdata["gex"], markers_short, groupby='leiden', dendrogram=True,
             standard_scale='var', var_group_positions=[(0,3)], cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_marker_genes_labelled.png")



## Interesting gene sets

#### Glycolysis genes regulated on RNA level according to Wu et al 2023

In [None]:
var_names = {'Glycolysis': ['PFKL', 'ALDOA', 'GAPDH', 'ENO1', 'PKM', 'SLC2A3']}

sc.pl.dotplot(mdata["gex"], var_names, groupby='leiden', dendrogram=True,
             standard_scale='var', swap_axes=True, cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_gylcolysis_genes.png")

#### MTOR genes

In [None]:
var_names = {'MTORC1': ['MTOR', 'RPTOR', 'MLST8', 'DEPTOR'],
             'MTORC2': ['MTOR', 'RICTOR', 'MLST8', 'DEPTOR'],
             'TSC': ['TSC1', 'TSC2'],
             'AMPK': ['PRKAA1', 'PRKAA2', 'PRKAB1', 'PRKAB2', 'PRKAG1', 'PRKAG2', 'PRKAG3']}

sc.pl.dotplot(mdata["gex"], var_names, groupby='leiden', dendrogram=True,
             standard_scale='var', swap_axes=True, cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_mtor_genes.png")

#### Interesting genes from Zehn et al 2022, Wu et al 2023

In [None]:
var_names = {'Glycolysis': ['PFKL', 'ALDOA', 'GAPDH', 'ENO1', 'PKM', 'SLC2A3'],
             'Lipid synthesis': ['SREBF1', 'SREBF2', 'FASN', 'ACACA'],
             'AA Transporter': ['SLC1A5', 'SLC7A1', 'SLC38A1', 'SLC38A2', 'SLC7A5'],
             'Lactate Transporter': ['SLC16A1', 'SLC16A3'],
             'Regulator': ['MYC', 'NFATC1', 'NFATC2', 'NFKB1', 'NFKB2','JAK1', 'JAK2', 'JAK3'],
             'Survival': ['ID3', 'BCL2', 'TOX', 'EGR2'],
             'Differentiation': ['NR4A1', 'NR4A2', 'NR4A3', 'EOMES', 'MYB', 'BCL6', 'PRDM1', 'TCF7', 'IL7R']}

sc.pl.dotplot(mdata["gex"], var_names, groupby='leiden', dendrogram=True,
             standard_scale='var', swap_axes=False, cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_Zehn2022_Wu2023_genes.png")

#### Other genes

In [None]:
genes = ['FOXO1', 'FOXO3', 'RICTOR', 'RPTOR', 'FOXP1', 'PIK3CD', 'AKT1', 'PTEN', 'IL7R', 'KLF2', 'S1PR1',
        'EIF4G1', 'ACACA', 'ACACB', 'TSC1', 'TSC2', 'IRF4', 'MYC', 'SCAP', 'NR1H3', 'SREBF1', 'SREBF2',
        'SLC1A5', 'SLC7A5', 'SLC2A1', 'NFKB1', 'NFKB2', 'RHEB', 'GLS', 'MPC1', 'MPC2', 'PDK1', 'HIF1A',
        'VHL', 'LDHA', 'HK2', 'TFRC', 'CTLA4', 'TIGIT', 'PDCD1', 'CD69']

genes2 = ['FOXO1', 'RICTOR', 'RPTOR', 'FOXP1', 'PIK3CD', 'AKT1', 'PTEN', 'IL7R', 'KLF2', 'S1PR1',
        'IRF4', 'MYC', 'SREBF1', 'SLC1A5', 'SLC7A5', 'SLC2A1', 'NFKB1', 'NFKB2', 'RHEB', 'GLS', 'MPC1', 
          'MPC2', 'PDK1', 'HIF1A', 'LDHA', 'TFRC', 'TIGIT', 'PDCD1', 'CD69', 'JUNB', 'GAPDH']

genes3 = ['FOXO1', 'RICTOR', 'RPTOR', 'FOXP1', 'PIK3CD', 'AKT1', 'PTEN', 'IL7R', 'KLF2', 'S1PR1',
        'IRF4', 'MYC', 'SREBF1', 'SLC1A5', 'SLC7A5', 'SLC2A1', 'NFKB1', 'NFKB2', 'RHEB', 'GLS', 'MPC1', 
          'MPC2', 'PDK1', 'HIF1A', 'LDHA', 'TFRC', 'TIGIT', 'PDCD1', 'CD69', 'JUNB', 'GAPDH', 'B2M', 'ETS1',
         'LEF1', 'PBXIP1', 'JAK1', 'LBH', 'TLE5', 'MAGED2', 'ELF1', 'STK17B', 'SORL1', 'SQSTM1', 'TXNIP',
         'RIPOR2', 'CD247', 'ALDOA', 'PGK1', 'EEF1A1', 'CD40LG', 'RPS27A', 'LCP2', 'CD69', 'NR4A1', 'NR4A2', 
          'CREB1', 'ATXN2L', 'RPL19', 'DDX5', 'TOP1', 'RBM39', 'SRSF2',
         'RBMX', 'RBM3', 'CLINT1', 'SLC2A3', 'MKI67']

genes4 = ['LCP2', 'CD69', 'NR4A1', 'NR4A2', 'CREB1', 'ATXN2L', 'RPL19', 'DDX5', 'TOP1', 'RBM39', 'SRSF2',
         'RBMX', 'RBM3', 'CLINT1', 'SLC2A3']

genes5 = ['BTG1', 'BTG2', 'PHLPP1', 'KLF3', 'KLF4', 'TCF7', 'BACH2', 'FOXO1', 'KLF2', 'SATB1', 'LEF1', 'RUNX1',
         'RUNX2', 'BATF', 'TBX21', 'IRF4', 'IFNG', 'H2AFX']

#AES is TLE5, FAM65B is  RIPOR2 

plot = sc.pl.dotplot(mdata["gex"], genes5, show=True, groupby='leiden', dendrogram=True, title='Genes By Leiden', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_geneset5.png")

#### Mitochondria structure proteins

In [None]:
mito = ['AFG1L', 'PTCD2', 'TMEM135', 'PRKN', 'ADCK1', 'RAB32', 'VPS13C', 'DNM1L', 'SUPV3L1', 'HSD17B10',
         'WASF1', 'ATAD3B', 'CAV2', 'FEZ1', 'MTFR1', 'RHOT1', 'PLEC', 'LETMD1', 'OPA1',
         'MFF', 'FANCG', 'SLIRP', 'MARCKS', 'ATAD3A', 'ATAD3C', 'PINK1', 'PARP1', 'SELENON', 'SIRT5',
         'MSTO1', 'STOML2', 'FXN', 'MAN2A1', 'COL6A1', 'GSK3B', 'SHARPIN', 'FIS1', 'NUBPL',
         'HTRA2', 'CEBPA', 'MTX3', 'CERT1', 'TMEM11', 'DNAJA3', 'MTX2', 'ARMCX3', 'ATP7A', 'POLDIP2', 'RAB3A', 
         'BCS1L', 'CLUH', 'MIEF2', 'SIRT4', 'CHCHD2', 'NIPSNAP2', 'MTFR2', 'MTM1', 'AGTPBP1', 'PANK2',
         'MTX1', 'CAMKMT', 'PPARGC1A', 'WDR81', 'TERT', 'CXADR', 'VPS54', 'YME1L1', 'PUM2', 'MYH14',
         'PRDX3', 'CHCHD10', 'P2RX7', 'LIPA', 'LONP1', 'PID1', 'PYROXD2', 'PHB2', 'PNPT1',
         'LRRK2', 'RHOT2', 'PARK7', 'CEP89', 'SLC25A36', 'SLC25A33', 'EPAS1', 'RAB29',
         'RAB38', 'NOA1', 'EDN1', 'GABPB1', 'NOS3', 'HMGCL']

#exclude: CHCHD2P9, KIF28P, TAFAZZIN, MT-RNR2, PHB1 (not found)

In [None]:
plot = sc.pl.dotplot(mdata["gex"], mito, show=True, groupby='leiden', dendrogram=True, title='Genes By Leiden', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_mitochondrial_structure_genes.png")

#### TSCM genes

In [None]:
tscm = ['CD27', 'CD28', 'IL7R', 'SELL', 'CCR7', 'CXCR3', 'ITGAL', 'MKI67', 'ATXN1', 'IL2RB', 'WNT3A', 'CD58']

#genes that decrease from naive to effector (Wang et al 2022)
dec = ['TCF7', 'LEF1', 'EOMES', 'FOXP1', 'CERS6', 'BCL2']

#genes that increase from naive to effector (Wang et al 2022)
inc = ['TBX21', 'PRDM1', 'GZMA', 'PRF1', 'IL2', 'IFNG', 'KLRG1']

In [None]:
plot = sc.pl.dotplot(mdata["gex"], tscm, show=True, groupby='leiden', dendrogram=True, title='Genes By Leiden', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_tscm_genes.png")

In [None]:
plot = sc.pl.dotplot(mdata["gex"], dec, show=True, groupby='leiden', dendrogram=True, title='Genes By Leiden', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_decreasing_N_to_EF_genes.png")

In [None]:
plot = sc.pl.dotplot(mdata["gex"], inc, show=True, groupby='leiden', dendrogram=True, title='Genes By Leiden', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_increasing_N_to_EF_genes.png")

In [None]:
plot = sc.pl.dotplot(mdata["gex"][(mdata["gex"].obs['leiden'].isin(['4'])) & 
                                  mdata["gex"].obs['specific_new'].isin(['NS4B214', 'no_binding'])], 
                     genes3, show=True, groupby='specific_new', title='Cluster 4', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_cluster4_specific_geneset3.png")

In [None]:
plot = sc.pl.dotplot(mdata["gex"][(mdata["gex"].obs['leiden'].isin(['10'])) & 
                                  mdata["gex"].obs['specific_new'].isin(['NS4B214', 'no_binding'])], 
                     genes3, show=True, groupby='specific_new', title='Cluster 10', cmap=cust)
plt.savefig("/media/agschober/HDD12/3_scRNA-Seq_Sina/7_DEG/Dotplot_cluster10_specific_geneset3.png")