In [3]:
import pandas as pd
import numpy as np
import pickle
import os
import sys
import scanpy as sc
import anndata as an
from pyprojroot import here

import warnings
warnings.filterwarnings('ignore')

# Load objects

**Table S3-A: Imput GeneSets**

In [18]:
with open(here('03_downstream_analysis/05_SPECTRA/results/SPECTRA_GeneSet_symbol_v2.pickle'), 'rb') as f:
    spectra_gene_set_dict = pickle.load(f)

**Table S3-B: Factor Weights**

In [9]:
gene_weights = pd.read_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Renamed_Processed_LAM0.001_S0.6_GeneWeights.csv'), index_col=0)

**Table S3-C: Factor Markers** 

In [10]:
gene_factor_df = pd.read_csv(here('03_downstream_analysis/05_SPECTRA/results/SPECTRAFactor_Processed_LAM0.001_S0.6_NonCellIdentitymarkersDF.csv'), index_col=0)
gene_factor_df.columns = gene_weights.index

# Generate Tables

In [29]:
varDF = pd.read_csv(here('03_downstream_analysis/05_SPECTRA/results/varDF.csv'), index_col=0)
ensg_to_symbol = varDF['symbol'].to_dict()

## Input GeneSets

In [19]:
def dict_to_dataframe(data_dict):
    max_len = max(len(v) for v in data_dict.values())  # Get the maximum length of values

    # Create a dictionary with Series that are padded with NaN where necessary
    filled_dict = {k: pd.Series(v + [np.nan] * (max_len - len(v))) for k, v in data_dict.items()}

    # Convert the filled dictionary to a DataFrame
    df = pd.DataFrame(filled_dict)

    return df

In [22]:
excel_file = here('03_downstream_analysis/05_SPECTRA/results/SPECTRA_input_GeneSets.xlsx')
with pd.ExcelWriter(excel_file) as writer:
    # Iterate over each dictionary and convert to DataFrame and save as sheets
    for sheet_name, data_dict in spectra_gene_set_dict.items():
        df = dict_to_dataframe(data_dict)
        df.to_excel(writer, sheet_name=sheet_name, index=False)

## GeneWeights and GeneFactors

In [39]:
# GeneWeights
gene_weights_symbol = gene_weights.rename(columns=ensg_to_symbol)

In [40]:
# GeneFactors
gene_factor_symbol = gene_factor_df.copy()
gene_factor_symbol.index = gene_factor_df.index.map(ensg_to_symbol)

In [42]:
excel_file = here('03_downstream_analysis/05_SPECTRA/results/SPECTRA_output.xlsx')
with pd.ExcelWriter(excel_file) as writer:
    # Write each DataFrame to a different sheet
    gene_weights_symbol.to_excel(writer, sheet_name='GeneWeights')
    gene_factor_symbol.to_excel(writer, sheet_name='FactorGenes')