In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import os
import sys
import scanpy as sc

from tqdm import tqdm
from sklearn.feature_selection import f_classif, mutual_info_classif
from scipy.stats import spearmanr, pearsonr, false_discovery_control

from kneed import DataGenerator, KneeLocator
from pyprojroot import here
import session_info

import Spectra as spc

from Spectra import K_est as kst
from upsetplot import from_contents, UpSet

# Sig 
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
import math

sys.path.insert(1, str(here('bin')))
#from customPalette import diseases_palette
from customPythonFunctions import generateID2SymbolDF

import warnings
warnings.filterwarnings('ignore')

from itertools import product

import decoupler as dc

# Load data

In [4]:
LAM = 0.001

In [5]:
project_dir = str(here('03_downstream_analysis/05_SPECTRA/'))

In [6]:
adataSPECTRA = sc.read_h5ad(f"{project_dir}/results/04_MAIN_SEAcell_scANVInorm_SPECTRA_lam{LAM}_overlap0.3_v2.h5ad")
adataSPECTRA

AnnData object with n_obs × n_vars = 71108 × 8253
    obs: 'spectra_categories'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'mt', 'ribo', 'hb', 'plt'
    uns: 'SPECTRA_L', 'SPECTRA_factors', 'SPECTRA_markers', 'SPECTRA_overlap', 'log1p'
    obsm: 'SPECTRA_cell_scores'

In [7]:
factors = adataSPECTRA.uns["SPECTRA_overlap"].index
factors

Index(['0-X-global-X-global_all_IL6-JAK-STAT3_signaling',
       '1-X-global-X-B_IFN_Type_1_2_Lambda',
       '2-X-global-X-global_all_type-I and II-ifn-response',
       '3-X-global-X-global_all_TNF-via-NFkB_signaling',
       '4-X-global-X-global_all_MHC-II-I presentation',
       '5-X-global-X-global_all_complement_production', '6-X-global-X-6',
       '7-X-B-X-effector', '8-X-B-X-B_chemokines',
       '9-X-B-X-Plasma_chemokine_receptors',
       ...
       '125-X-pDC-X-T_CD4_NonNaive_chemokines',
       '126-X-pDC-X-T_CD8_Naive_chemokine_receptors',
       '127-X-pDC-X-T_CD4_Naive_cytokine_and_receptors_proinflammatory',
       '128-X-pDC-X-T_CD8_Naive_cytokine_and receptors_ antiinflammatory',
       '129-X-pDC-X-pDC_IFN_Type_1_2_Lambda',
       '130-X-pDC-X-T_CD8_Naive_IFN_response',
       '131-X-pDC-X-T_CD8_NonNaive_TNF_receptors_ligands',
       '132-X-pDC-X-Plasma_adhesion_molecules',
       '133-X-pDC-X-pDC_antigen_presentation_molecules', '134-X-pDC-X-134'],
      dtype='ob

In [8]:
ULMres = pd.read_csv(f"{project_dir}/results/ULMresDF_{LAM}.csv", index_col=0)
ULMres

Unnamed: 0,factor,gene_set,estimate,pValues,Level1,adj_pV,annotated
0,0-X-global-X-global_all_IL6-JAK-STAT3_signaling,global_all_IL6-JAK-STAT3_signaling,73.236250,0.000000e+00,global,0.000000e+00,True
1,1-X-global-X-B_IFN_Type_1_2_Lambda,global_all_IL6-JAK-STAT3_signaling,16.788246,3.183197e-62,global,4.130667e-61,True
2,2-X-global-X-global_all_type-I and II-ifn-resp...,global_all_IL6-JAK-STAT3_signaling,0.532302,5.945314e-01,global,9.684836e-01,False
3,3-X-global-X-global_all_TNF-via-NFkB_signaling,global_all_IL6-JAK-STAT3_signaling,3.502776,4.628742e-04,global,2.205400e-03,True
4,4-X-global-X-global_all_MHC-II-I presentation,global_all_IL6-JAK-STAT3_signaling,-0.437972,6.614182e-01,global,9.684836e-01,False
...,...,...,...,...,...,...,...
145,130-X-pDC-X-T_CD8_Naive_IFN_response,pDC_cytokine_and_receptors_proinflammatory,-0.290128,7.717257e-01,pDC,9.684836e-01,False
146,131-X-pDC-X-T_CD8_NonNaive_TNF_receptors_ligands,pDC_cytokine_and_receptors_proinflammatory,-0.203296,8.389088e-01,pDC,9.684836e-01,False
147,132-X-pDC-X-Plasma_adhesion_molecules,pDC_cytokine_and_receptors_proinflammatory,-0.376227,7.067580e-01,pDC,9.684836e-01,False
148,133-X-pDC-X-pDC_antigen_presentation_molecules,pDC_cytokine_and_receptors_proinflammatory,-0.630489,5.283924e-01,pDC,9.684836e-01,False


In [9]:
factorBestMatch = pd.read_csv(f"{project_dir}/results/factorBestMatchDF_{LAM}.csv", index_col=0)
factorBestMatch

Unnamed: 0,factor,gene_set,estimate,pValues,Level1,adj_pV,annotated
0,0-X-global-X-global_all_IL6-JAK-STAT3_signaling,global_all_IL6-JAK-STAT3_signaling,73.236250,0.000000e+00,global,0.000000e+00,True
8,1-X-global-X-B_IFN_Type_1_2_Lambda,global_all_JAK-STAT_signaling,83.666985,0.000000e+00,global,0.000000e+00,True
18,4-X-global-X-global_all_MHC-II-I presentation,global_all_MHC-II-I presentation,78.628580,0.000000e+00,global,0.000000e+00,True
24,3-X-global-X-global_all_TNF-via-NFkB_signaling,global_all_TNF-via-NFkB_signaling,108.212700,0.000000e+00,global,0.000000e+00,True
33,5-X-global-X-global_all_complement_production,global_all_complement_production,48.866062,0.000000e+00,global,0.000000e+00,True
...,...,...,...,...,...,...,...
108,133-X-pDC-X-pDC_antigen_presentation_molecules,pDC_antigen_presentation_molecules,120.828735,0.000000e+00,pDC,0.000000e+00,True
111,126-X-pDC-X-T_CD8_Naive_chemokine_receptors,pDC_chemokine_receptors,50.870296,0.000000e+00,pDC,0.000000e+00,True
120,125-X-pDC-X-T_CD4_NonNaive_chemokines,pDC_chemokines,56.246185,0.000000e+00,pDC,0.000000e+00,True
133,128-X-pDC-X-T_CD8_Naive_cytokine_and receptors...,pDC_cytokine_and receptors_ antiinflammatory,38.498188,2.332368e-298,pDC,4.397609e-297,True


# Rename factors

## Rename factors, BestMatch

In [10]:
print(factorBestMatch.head())

                                             factor  \
0   0-X-global-X-global_all_IL6-JAK-STAT3_signaling   
8                1-X-global-X-B_IFN_Type_1_2_Lambda   
18    4-X-global-X-global_all_MHC-II-I presentation   
24   3-X-global-X-global_all_TNF-via-NFkB_signaling   
33    5-X-global-X-global_all_complement_production   

                              gene_set    estimate  pValues  Level1  adj_pV  \
0   global_all_IL6-JAK-STAT3_signaling   73.236250      0.0  global     0.0   
8        global_all_JAK-STAT_signaling   83.666985      0.0  global     0.0   
18    global_all_MHC-II-I presentation   78.628580      0.0  global     0.0   
24   global_all_TNF-via-NFkB_signaling  108.212700      0.0  global     0.0   
33    global_all_complement_production   48.866062      0.0  global     0.0   

    annotated  
0        True  
8        True  
18       True  
24       True  
33       True  


In [13]:
factor = '2-X-global-X-global_all_type-I and II-ifn-response'
factor_celltype = factor.split('-X-')[1]
group = factorBestMatch[factorBestMatch['factor'] == factor]
new_name = group['gene_set'].iloc[0].replace(factor_celltype + '_', '')
new_name.replace(' ', '_')

'all_type-I_and_II-ifn-response'

In [22]:
factor_reannotation = {}

# Function to rename factors
def rename_factor(factor):
    if factor not in factorBestMatch['factor'].values:
        return 'Removed'
    
    group = factorBestMatch[factorBestMatch['factor'] == factor]
    factor_number = factor.split('-X-')[0]
    factor_celltype = factor.split('-X-')[1]
    if factor_celltype == "DC":
        new_name = group['gene_set'].iloc[0].replace("cDC_", '')
    else:
        new_name = group['gene_set'].iloc[0].replace(factor_celltype + '_', '')
    new_name = new_name.replace(" ", '')
    return f"{factor_number}-X-{factor_celltype}-X-{new_name}"

# Process each factor in factors
for factor in factors:
    new_name = rename_factor(factor)
    factor_reannotation[factor] = new_name

# Order the factor_reannotation dictionary based on factor_number
sorted_factor_reannotation = dict(sorted(factor_reannotation.items(), key=lambda item: int(item[0].split('-X-')[0])))

# Display the sorted dictionary
functions = list()
for factor in list(sorted_factor_reannotation.values()): 
    if factor != "Removed": 
        factor_function = factor.split('-X-')[2]
        functions.append(factor_function)
print(set(functions))

{'all_TNF-via-NFkB_signaling', 'Tregs_FoxP3_stabilization', 'CD4T_TH17_UP', 'IFN_Type_1_2_Lambda', 'IL4-IL13_response', 'CD4T_TFH_UP', 'all_IL6-JAK-STAT3_signaling', 'CD4T_TH2_UP', 'effector', 'IFN_response', 'CD8T_tcr_activation', 'cytokine_andreceptors_antiinflammatory', 'antigen_presentation_molecules', 'all_complement_production', 'global_all_TNF-via-NFkB_signaling', 'CD4T_TH1_UP', 'chemokines', 'all_JAK-STAT_signaling', 'TNF_receptors_ligands', 'all_MHC-II-Ipresentation', 'cytokine_and_receptors_proinflammatory', 'CD8T_exhaustion', 'IFNG_response', 'DC_antigen-crosspresentation', 'chemokine_receptors', 'all_type-IandII-ifn-response', 'adhesion_molecules'}


In [23]:
removed_count = sum(1 for name in sorted_factor_reannotation.values() if name == "Removed")
removed_count

10

In [24]:
path = project_dir + '/results/'
with open(path + 'reannotated_factors_dict.pkl', 'wb') as f:
    pickle.dump(sorted_factor_reannotation, f)

In [25]:
path + 'reannotated_factors_dict.pkl'

'/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/05_SPECTRA/results/reannotated_factors_dict.pkl'

# Rename SPECTRA output

## Factor_markers

In [26]:
factor_markers = pd.read_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Processed_LAM0.001_S0.6_NonCellIdentitymarkersDF.csv'), index_col=0)
factor_markers

Unnamed: 0_level_0,0-X-global-X-global_all_IL6-JAK-STAT3_signaling,1-X-global-X-B_IFN_Type_1_2_Lambda,2-X-global-X-global_all_type-I and II-ifn-response,3-X-global-X-global_all_TNF-via-NFkB_signaling,4-X-global-X-global_all_MHC-II-I presentation,5-X-global-X-global_all_complement_production,7-X-B-X-effector,8-X-B-X-B_chemokines,9-X-B-X-Plasma_chemokine_receptors,10-X-B-X-UTC_cytokine_and_receptors_proinflammatory,...,123-X-UTC-X-cDC_antigen_presentation_molecules,125-X-pDC-X-T_CD4_NonNaive_chemokines,126-X-pDC-X-T_CD8_Naive_chemokine_receptors,127-X-pDC-X-T_CD4_Naive_cytokine_and_receptors_proinflammatory,128-X-pDC-X-T_CD8_Naive_cytokine_and receptors_ antiinflammatory,129-X-pDC-X-pDC_IFN_Type_1_2_Lambda,130-X-pDC-X-T_CD8_Naive_IFN_response,131-X-pDC-X-T_CD8_NonNaive_TNF_receptors_ligands,132-X-pDC-X-Plasma_adhesion_molecules,133-X-pDC-X-pDC_antigen_presentation_molecules
ensembl_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000001167,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000002549,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000002586,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
ENSG00000004468,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000005339,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000276600,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000277443,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000277632,False,False,False,False,False,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
ENSG00000277734,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [27]:
factor_markers_renamed = factor_markers.rename(columns=sorted_factor_reannotation)
factor_markers_renamed

Unnamed: 0_level_0,0-X-global-X-all_IL6-JAK-STAT3_signaling,1-X-global-X-all_JAK-STAT_signaling,2-X-global-X-all_type-IandII-ifn-response,3-X-global-X-all_TNF-via-NFkB_signaling,4-X-global-X-all_MHC-II-Ipresentation,5-X-global-X-all_complement_production,7-X-B-X-effector,8-X-B-X-chemokines,9-X-B-X-chemokine_receptors,10-X-B-X-cytokine_and_receptors_proinflammatory,...,123-X-UTC-X-antigen_presentation_molecules,125-X-pDC-X-chemokines,126-X-pDC-X-chemokine_receptors,127-X-pDC-X-cytokine_and_receptors_proinflammatory,128-X-pDC-X-cytokine_andreceptors_antiinflammatory,129-X-pDC-X-IFN_Type_1_2_Lambda,130-X-pDC-X-IFN_response,131-X-pDC-X-TNF_receptors_ligands,132-X-pDC-X-adhesion_molecules,133-X-pDC-X-antigen_presentation_molecules
ensembl_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000001167,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000002549,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000002586,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
ENSG00000004468,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000005339,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000276600,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000277443,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ENSG00000277632,False,False,False,False,False,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
ENSG00000277734,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [28]:
factor_markers_renamed.to_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Renamed_Processed_LAM0.001_S0.6_markersDF.csv'), index=True)

In [29]:
here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Renamed_Processed_LAM0.001_S0.6_markersDF.csv')

PosixPath('/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Renamed_Processed_LAM0.001_S0.6_markersDF.csv')

## GeneWeights

In [30]:
gene_weights = pd.read_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Processed_LAM0.001_S0.6_GeneWeights.csv'), index_col=0)
gene_weights

Unnamed: 0,ENSG00000000003,ENSG00000000457,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,ENSG00000001460,ENSG00000001461,ENSG00000001561,...,ENSG00000278619,ENSG00000278637,ENSG00000278662,ENSG00000278677,ENSG00000278705,ENSG00000278817,ENSG00000278828,ENSG00000280670,ENSG00000280789,ENSG00000282608
0-X-global-X-global_all_IL6-JAK-STAT3_signaling,1.662220e-15,3.255362e-14,7.108393e-15,4.331535e-17,5.497816e-16,3.206445e-15,1.057057e-15,4.152293e-15,1.354309e-15,7.388704e-15,...,8.076534e-17,1.604406e-15,2.332613e-15,1.923066e-15,1.979497e-15,3.506886e-16,5.575245e-15,8.837729e-17,2.977278e-15,1.527173e-16
1-X-global-X-B_IFN_Type_1_2_Lambda,1.108171e-15,2.299332e-14,4.758343e-15,3.819025e-17,3.985897e-16,2.137640e-15,3.022476e-17,3.077916e-15,1.422393e-15,6.019337e-15,...,6.932764e-17,1.214989e-15,1.637209e-15,1.386650e-15,1.631433e-15,2.437689e-16,4.031500e-15,6.686839e-17,2.288526e-15,9.870781e-17
2-X-global-X-global_all_type-I and II-ifn-response,2.737120e-15,5.744951e-14,1.259858e-14,5.163268e-17,1.113090e-15,5.587422e-15,4.612209e-20,7.008616e-15,3.292712e-15,1.330521e-14,...,1.408319e-16,2.763175e-15,3.994905e-15,3.144978e-15,3.659637e-15,5.858709e-16,6.734683e-15,1.542614e-16,4.606112e-15,2.549332e-16
3-X-global-X-global_all_TNF-via-NFkB_signaling,8.518205e-16,1.570927e-14,3.516911e-15,3.303363e-17,3.045630e-16,1.666658e-15,9.245895e-20,2.090478e-15,8.147142e-16,4.175936e-15,...,4.329238e-17,9.039299e-16,1.197679e-15,1.025573e-15,1.188532e-15,1.864964e-16,2.036101e-15,4.384817e-17,1.668691e-15,6.243284e-17
4-X-global-X-global_all_MHC-II-I presentation,2.003873e-15,5.072165e-14,1.220851e-14,4.364942e-17,1.004335e-15,4.993081e-15,1.373877e-03,5.995152e-15,1.772227e-15,1.293963e-14,...,9.679482e-17,2.463584e-15,3.413538e-15,2.854015e-15,1.960062e-15,5.124434e-16,7.823581e-15,1.331931e-16,5.220355e-15,1.964017e-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129-X-pDC-X-pDC_IFN_Type_1_2_Lambda,8.647568e-15,2.760989e-13,5.921884e-14,1.127890e-14,2.858028e-13,2.170342e-13,9.314475e-14,3.654577e-14,5.714121e-14,1.665816e-13,...,9.171595e-14,5.396469e-15,8.568845e-15,2.340803e-14,1.232031e-14,1.160916e-14,1.881297e-14,4.408738e-14,1.260044e-13,6.733195e-14
130-X-pDC-X-T_CD8_Naive_IFN_response,1.642870e-15,1.237488e-13,6.161063e-14,3.628381e-15,4.964424e-14,1.430628e-13,5.573538e-13,5.765918e-14,6.848309e-13,6.952614e-13,...,3.649419e-14,2.348518e-14,7.914992e-15,3.597958e-14,3.741970e-15,8.533358e-15,2.318098e-14,2.053319e-13,9.498873e-14,3.126309e-14
131-X-pDC-X-T_CD8_NonNaive_TNF_receptors_ligands,3.877077e-15,6.343989e-13,2.317601e-14,8.636264e-15,8.570168e-14,5.230477e-13,5.435202e-13,5.499726e-14,1.329280e-13,8.631236e-14,...,6.168955e-14,1.804984e-14,5.721282e-15,2.139786e-13,4.331456e-15,4.673451e-15,1.162242e-13,1.941819e-14,1.823400e-13,8.509506e-15
132-X-pDC-X-Plasma_adhesion_molecules,3.803800e-15,1.252868e-13,2.177605e-14,3.508399e-15,4.711810e-14,2.333114e-13,1.270576e-13,2.725763e-14,7.623577e-14,6.206206e-14,...,4.229873e-14,1.773174e-14,6.955283e-15,5.442488e-14,1.644905e-14,7.408637e-15,1.273251e-13,1.211540e-13,1.501877e-13,2.532733e-14


In [31]:
gene_weights_renamed = gene_weights.rename(index=sorted_factor_reannotation)
gene_weights_renamed.index

Index(['0-X-global-X-all_IL6-JAK-STAT3_signaling',
       '1-X-global-X-all_JAK-STAT_signaling',
       '2-X-global-X-all_type-IandII-ifn-response',
       '3-X-global-X-all_TNF-via-NFkB_signaling',
       '4-X-global-X-all_MHC-II-Ipresentation',
       '5-X-global-X-all_complement_production', '7-X-B-X-effector',
       '8-X-B-X-chemokines', '9-X-B-X-chemokine_receptors',
       '10-X-B-X-cytokine_and_receptors_proinflammatory',
       ...
       '123-X-UTC-X-antigen_presentation_molecules', '125-X-pDC-X-chemokines',
       '126-X-pDC-X-chemokine_receptors',
       '127-X-pDC-X-cytokine_and_receptors_proinflammatory',
       '128-X-pDC-X-cytokine_andreceptors_antiinflammatory',
       '129-X-pDC-X-IFN_Type_1_2_Lambda', '130-X-pDC-X-IFN_response',
       '131-X-pDC-X-TNF_receptors_ligands', '132-X-pDC-X-adhesion_molecules',
       '133-X-pDC-X-antigen_presentation_molecules'],
      dtype='object', length=125)

In [32]:
gene_weights_renamed.to_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Renamed_Processed_LAM0.001_S0.6_GeneWeights.csv'), index=True)