#### Notebook for creation a sunburst plot with genes from de-novo gene program 11 identified with `NicheCompass`
- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Creation Date:** 25th of May 2024

In [10]:
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import plotly.express as px

In [78]:
fig_dir = '/mnt/LaCIE/annaM/gut_project/Processed_data/Gut_data/10X_Xenium_data/NicheCompass/Xenium_add_on/NicheCompass/artifacts/single_sample/05042024_120038/figures'

In [142]:
categories = [
    "Immune System", "Immune System", 
    "Immune System", "Immune System", 
    "Immune System", "Immune System", 
    "Immune System", "Immune System", 
    "Immune System", "Immune System", 
    "Immune System", "Immune System", 
    "Immune System", "Regulation of Gene Expression", 
    "Regulation of Gene Expression", "Regulation of Gene Expression", "Regulation of Gene Expression",
    "Regulation of Gene Expression", "Regulation of Gene Expression", 
    "Regulation of Gene Expression", "Regulation of Gene Expression", 
    "Regulation of Gene Expression", "Regulation of Gene Expression", 
    "Regulation of Gene Expression", "Regulation of Gene Expression", 
    "Regulation of Gene Expression", "Signalling", 
    "Signalling", "Signalling", "Signalling", 
    "Signalling", "Development and Differentiation", "Development and Differentiation", 
    "Development and Differentiation", "Development and Differentiation", "Metabolism", 
    "Metabolism", "Metabolism", "Metabolism", "Metabolism", 
    "Metabolism", "Metabolism", "Cell Structure and Adhesion", "Cell Structure and Adhesion", "Cell Structure and Adhesion", 
    "Cell Structure and Adhesion", "Cell Structure and Adhesion", "Cell Structure and Adhesion", 
    "Cell Structure and Adhesion", "Signalling", 
    "Signalling", "Immune System", "Metabolism", "Immune System", 
    "Cell Structure and Adhesion" 
]

In [143]:
subcategories = [
    "Immune Receptors and Signaling", "Immune Receptors and Signaling", "Immune Receptors and Signaling", 
    "Immune Receptors and Signaling", "Cytokines and Chemokines", "Cytokines and Chemokines", 
    "T Cell Markers", "T Cell Markers", "NK Cell Receptors", "Regulatory Proteins", 
    "Regulatory Proteins", "Other Immune Genes", "Other Immune Genes", 
    "Transcription Factors", "Transcription Factors", 
    "CREB Family", "DNA Repair", "Nuclear Receptors", "Homeobox Genes", "Homeobox Genes", "Homeobox Genes", 
    "Transcriptional Regulators", "Transcriptional Regulators", "Transcriptional Regulators", 
    "Transcriptional Regulators", "Transcriptional Regulators", "Growth Factor Receptors", 
    "GPCRs", "GPCRs", 
    "Cell Adhesion", "Cell Adhesion", "G-Protein Subunits", 
    "Development Regulators", "Development Regulators", 
    "Development Regulators", "Nitric Oxide metabolism", "Glucose metabolism",
    "Metal Binding Proteins", "Metal Binding Proteins", "Enzymes", 
    "Enzymes", "Enzymes", "Structural Proteins",  "Structural Proteins", "Structural Proteins", 
    "Cell Adhesion", "Cell Adhesion", "Mesothelin Family", "Structural Proteins", 
    "Cell Signaling", "Ion Channel Regulators", 
    "Other Immune Genes", "Heat Shock Proteins", "Other Immune Genes", 
    "Structural Proteins"
]

In [144]:
genes = [
    "LILRA4", "CD5", "CLEC9A", "FOXP3", "CXCR1", "CXCR4", "CD8A", "CD8B", 
    "KLRB1", "IRF8", "ITK", "GPR183", "RETNLB", "HES6", "ASCL2", "CREB3L1","BRCA2", 
    "RORA", "ISL1", "NKX2-2", "PAX4", "LEF1", "ZEB2", "CTNNB1", "RFX6", 
    "PDGFRB", "FFAR4", "ROBO2", "KIT", "GNA11", "RUNX1T1", "NET1", "ARX", 
    "CTLA4", "TBC1D4", "NOSIP", "MT1A", "SELENOM", "PLCE1", "FKBP11", "CA2", 
    "IL4I1", "MCEMP1", "KRT8", "TUBB", "CDHR5", "ANK2", "MSLN", "HEPACAM2", 
    "REP15", "BEST2", "VPREB3", "DNAJC12", "PDZK1IP1", "MLPH",
]

In [145]:
len(categories), len(subcategories), len(genes)

(55, 55, 55)

In [146]:
# Create a dataframe with the data
df = pd.DataFrame({
    "Category": categories,
    "Subcategory": subcategories,
    "Gene": genes
})

In [84]:
fig = px.sunburst(df, path=['Category', 'Subcategory', 'Gene'], color='Category')
fig.show()

+ Add gene weight

In [147]:
all_genes = ['LILRA4', 'RPS4Y1', 'CD5', 'HES6', 'RUNX1T1', 'FFAR4', 'MCEMP1', 'ASCL2', 'KLRC2', 'CREB3L1', 'TBC1D4', 'GNA11', 'PKHD1L1', 'CLEC9A', 'NET1', 'ROBO2', 'NOSIP', 'MT1A', 'SELENOM', 'ARX', 'PDGFRB', 'NKX2-2', 'CTNNB1', 'RETNLB', 'FOXP3', 'CD8A', 'IRF8', 'ITK', 'RFX6', 'BRCA2', 'VPREB3', 'DNAJC12', 'CHI3L2', 'IL4I1', 'PDZK1IP1', 'ISL1', 'MLPH', 'RORA', 'CTLA4', 'REP15', 'TUBB', 'BEST2', 'CD8B', 'PAX4', 'KRT8', 'MSLN', 'GPR183', 'L1TD1', 'HEPACAM2', 'CXCR1', 'ANK2', 'KLRB1', 'SCG3', 'PLCE1', 'FKBP11', 'CA2', 'CDHR5', 'KIT', 'LEF1', 'ZEB2', 'PSTPIP2', 'CXCR4', 'PLVAP', 'TCL1A', 'DUOX2', 'CCDC80', 'CEP126', 'ODF2L', 'MYH14', 'LGR5', 'RORC', 'PIGR', 'F3', 'MMP3', 'IL1RAPL1', 'ACTA2', 'IL7R', 'PDGFRA', 'TUBA1A', 'SOCS3', 'C7', 'TRAC', 'OLFM4', 'WFDC2', 'EPHB3', 'DOCK10', 'IL1RL1', 'CD3D', 'PBK', 'GZMA', 'BANK1', 'SNCA', 'C2orf88', 'C1QBP', 'EBPL', 'SPIB', 'TRGV4', 'MS4A7', 'CD36', 'CA1', 'ETV1', 'CYB5R3', 'HRCT1', 'RUNX1', 'BATF', 'SELENBP1', 'LAG3', 'SCGN', 'KRT1', 'PDCD1', 'CDH19', 'SMOC2', 'SMIM14', 'KRTCAP3', 'NKG7', 'CTSE', 'SEC11C', 'SLC26A2', 'SOX6', 'SULT1B1', 'CD3G', 'PROX1', 'SELENOK', 'GREM2', 'VWA5B2', 'PCDH11X', 'AFAP1L2', 'CDCA7', 'CDK15', 'TRBC2', 'CES1', 'TAGLN', 'SLC29A4', 'AKR7A3', 'TMIGD1', 'FCRLA', 'PI3', 'DNASE1L3', 'GALNT5', 'IER5', 'CADPS', 'PRPH', 'TOP2A', 'GPRC5C', 'SMIM6', 'IDO1', 'CDK6', 'MS4A8', 'TCF21', 'CES2', 'CCR7', 'FEV', 'TRBC1', 'TRAT1', 'CD79A', 'CA7', 'SOX4', 'IMPDH2', 'RNASE1', 'IFI27', 'KLK1', 'DPYSL3', 'MS4A2', 'RNF43', 'ARHGAP24', 'GALNT8', 'ABCC8', 'PRDX4', 'CLCA4', 'LCT', 'TMEM61', 'PCLAF', 'CST7', 'LRMP', 'AVIL', 'INSM1', 'RHOV', 'TNFRSF17', 'SH2D7', 'TRDV1', 'MUC12', 'CHRM3', 'HTR3E', 'CDKN2B', 'CRYBA2', 'NUSAP1', 'FRZB', 'ETS1', 'TRPM5', 'RAB26', 'PTGER4', 'SCG5', 'RGS13', 'LEPROTL1', 'FCRL1', 'AQP1', 'SPDEF', 'GNLY', 'CTSB', 'SH2D6', 'GNPTAB', 'ICOS', 'DERL3', 'MB', 'NEUROD1', 'ANO7', 'MAF', 'TK1', 'SIT1', 'ACACB', 'CMBL', 'MS4A1', 'PRF1', 'CPA3', 'HES1', 'FZD7', 'GPRIN3', 'AQP8', 'STMN1', 'SFRP4', 'CD163', 'HMGB2', 'GATA2', 'PTPRB', 'CPE', 'HPGDS', 'SCNN1A', 'CYBB', 'PAX5', 'BMX', 'LYVE1', 'AGTR1', 'ACKR1', 'SOX9', 'FYB1', 'SLC12A2', 'EGFR', 'CXCR5', 'CXCR3', 'KIF5C', 'AKR1C3', 'SOD3', 'CHGA', 'ITLN1', 'HHIP', 'RBFOX3', 'PLXND1', 'SFXN1', 'ANXA13', 'RAP1GAP', 'SCG2', 'CXCR2', 'DEPP1', 'CFTR', 'RGMB', 'MEIS2', 'FABP2', 'FOXA3', 'GZMK', 'RRM2', 'KLRD1', 'BCAS1', 'HAVCR2', 'REG4', 'UGT2B17', 'GPX2', 'UBE2C', 'CHGB', 'KRT86', 'SLC18A2', 'CD83', 'SVOPL', 'BEST4', 'C1QC', 'TOX', 'RAB3B', 'PDE4C', 'STXBP6', 'ID2', 'UGT2A3', 'TNFRSF25', 'GIMAP7', 'CHP2', 'TYMS', 'MKI67', 'SPOCK2', 'TNFAIP3', 'CALB2', 'ATOH1', 'CKAP4', 'LMCD1', 'PTTG1', 'CLCA1', 'CA4', 'PPP1R1B', 'MBP', 'FERMT1', 'DMBT1', 'NOTCH3', 'SDCBP2', 'ABCA8', 'NOVA1', 'TKT', 'IL17RB', 'RIIAD1', 'SLC26A3', 'MS4A12', 'CR2', 'PLPP2', 'ADRA2A', 'NXPE4', 'S100P', 'OTOP2', 'RGS5', 'B3GNT6', 'KCNAB1', 'PLN', 'CMA1', 'LGALS2', 'CD3E']

In [148]:
all_weights = [0.012, 0.0081, 0.007, 0.0067, 0.0062, 0.006, 0.0059, 0.0059, 0.0058, 0.0055, 0.0055, 0.0055, 0.0053, 0.0052, 0.0052, 0.0051, 0.0051, 0.0051, 0.0048, 0.0046, 0.0046, 0.0046, 0.0044, 0.0043, 0.0042, 0.0042, 0.0042, 0.0042, 0.0042, 0.0042, 0.0041, 0.0041, 0.0041, 0.0041, 0.0039, 0.0039, 0.0039, 0.0039, 0.0037, 0.0037, 0.0037, 0.0037, 0.0037, 0.0036, 0.0036, 0.0036, 0.0035, 0.0035, 0.0034, 0.0034, 0.0034, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0032, 0.0032, 0.0031, 0.0031, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.0029, 0.0029, 0.0029, 0.0029, 0.0029, 0.0029, 0.0028, 0.0028, 0.0028, 0.0027, 0.0027, 0.0027, 0.0027, 0.0026, 0.0026, 0.0026, 0.0026, 0.0025, 0.0025, 0.0025, 0.0025, 0.0025, 0.0025, 0.0025, 0.0025, 0.0024, 0.0024, 0.0024, 0.0024, 0.0024, 0.0024, 0.0024, 0.0024, 0.0023, 0.0023, 0.0023, 0.0023, 0.0023, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0021, 0.0021, 0.0021, 0.0021, 0.0021, 0.0021, 0.0021, 0.0021, 0.0021, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.0019, 0.0019, 0.0019, 0.0019, 0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0012, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 0.0, 0.0, 0.0]

In [149]:
gene_importance_dataframe = pd.DataFrame({
    "Gene": all_genes,
    "Weight": all_weights
})

In [150]:
# concatenate the two dataframes by 'Gene' column
df = pd.merge(df, gene_importance_dataframe, on='Gene')

In [151]:
fig = px.sunburst(df, 
                  path=['Category', 'Subcategory', 'Gene'], 
                  color='Weight',
                  color_continuous_scale='magma_r')

fig.update_layout(coloraxis_colorbar=dict(
    title="Gene Importance Weight",
    tickvals=[min(df['Weight']), max(df['Weight'])],
))

fig.update_layout(
    width=1000,
    height=1000
)

# Set the same label size for all labels
fig.update_traces(
    textinfo="label",
    insidetextfont=dict(size=15)
)

fig.show()

# Save the figure as a high-quality image
fig.write_image(f'{fig_dir}/sunburst_plot.png', scale=3)


In [62]:
# Create the sunburst plot with the base category colors
fig = px.sunburst(df, path=['Category', 'Subcategory', 'Gene'], color='Category')

# Define the color scales
color_scale = px.colors.sequential.Magma_r

# Update the colors for each layer manually
# You might need to adapt this section based on your specific DataFrame structure and desired colors

# Color the 'Category' layer
fig.data[0].marker.colors = px.colors.qualitative.Plotly

# Color the 'Subcategory' layer
subcategory_colors = {subcategory: px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]
                      for i, subcategory in enumerate(df['Subcategory'].unique())}
for trace in fig.data:
    if trace.name in subcategory_colors:
        trace.marker.colors = subcategory_colors[trace.name]

# Color the 'Gene' layer with the 'Weight' using the magma_r color scale
for trace in fig.data:
    if trace.name == 'Gene':
        trace.marker.colors = [color_scale[int(weight/max(df['Weight'])*len(color_scale))] for weight in df['Weight']]
        trace.marker.colorbar = dict(title='Weight')

fig.show()