<a href="https://colab.research.google.com/github/LielUziahu/L.Uziahu_Lab_Notebook-Mass_Lab/blob/master/Generate_Sunburst_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 1. Data Loading and Cleaning
file_path = 'Categorized_Gene_Table_HF_NF_with_LFC_no_unknown.csv'
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print("File not found. Please ensure 'Categorized_Gene_Table_HF_NF_with_LFC_no_unknown.csv' is in the correct directory.")
    # Fallback for demonstration if file isn't found, creating a dummy set for demonstration structure
    df = pd.DataFrame({
        'Morph': ['HF', 'HF', 'NF', 'NF', 'HF'],
        'Category': ['Adhesion', 'Adhesion', 'Immunity', 'Immunity', 'Signaling'],
        'Sub-Category': ['General', 'Receptors', 'Cytokines', 'Receptors', 'Kinases'],
        'Gene Symbol': ['GENE1', 'GENE2', 'GENE3', 'GENE4', 'GENE5'],
        'LFC': [2.5, -1.8, 3.1, -2.0, 1.5],
        'Description': ['Description for GENE1', 'Description for GENE2', 'Description for GENE3', 'Description for GENE4', 'Description for GENE5'],
        'Biological Function & Explanation': ['Func1', 'Func2', 'Func3', 'Func4', 'Func5'],
        'Reference': ['Ref1', 'Ref2', 'Ref3', 'Ref4', 'Ref5']
    })
    print("Created dummy DataFrame for demonstration.")

df = df.dropna(subset=['Morph', 'Category', 'Sub-Category', 'Gene Symbol', 'LFC', 'Description'])
print("Data loaded and cleaned.")

# 2. Prepare Data for go.Sunburst
sunburst_ids = []
sunburst_labels = []
sunburst_parents = []
sunburst_values = []
sunburst_hover_lfc_text = []
sunburst_hover_desc_text = []
sunburst_node_category = []

unique_morphs = df['Morph'].unique()

for morph in unique_morphs:
    morph_id = morph
    sunburst_ids.append(morph_id)
    sunburst_labels.append(morph)
    sunburst_parents.append('')
    sunburst_values.append(0)
    sunburst_hover_lfc_text.append("")
    sunburst_hover_desc_text.append("")
    sunburst_node_category.append(None)

    df_morph = df[df['Morph'] == morph]
    unique_categories = df_morph['Category'].unique()

    for category in unique_categories:
        category_id = f"{morph_id}_{category}"
        sunburst_ids.append(category_id)
        sunburst_labels.append(category)
        sunburst_parents.append(morph_id)
        sunburst_values.append(0)
        sunburst_hover_lfc_text.append("")
        sunburst_hover_desc_text.append("")
        sunburst_node_category.append(category)

        df_category = df_morph[df_morph['Category'] == category]
        unique_subcategories = df_category['Sub-Category'].unique()

        for subcategory in unique_subcategories:
            subcategory_id = f"{category_id}_{subcategory}"
            sunburst_ids.append(subcategory_id)
            sunburst_labels.append(subcategory)
            sunburst_parents.append(category_id)
            sunburst_values.append(0)
            sunburst_hover_lfc_text.append("")
            sunburst_hover_desc_text.append("")
            sunburst_node_category.append(category)

            df_subcategory = df_category[df_category['Sub-Category'] == subcategory]

            for index, row in df_subcategory.iterrows():
                gene_symbol = row['Gene Symbol']
                gene_id = f"{subcategory_id}_{gene_symbol}"
                sunburst_ids.append(gene_id)
                sunburst_labels.append(gene_symbol)
                sunburst_parents.append(subcategory_id)
                sunburst_values.append(abs(row['LFC']))
                sunburst_hover_lfc_text.append(f"LFC: {row['LFC']:.2f}")
                sunburst_hover_desc_text.append(f"Description: {row['Description']}")
                sunburst_node_category.append(row['Category'])

sunburst_data_df = pd.DataFrame({
    'ids': sunburst_ids,
    'labels': sunburst_labels,
    'parents': sunburst_parents,
    'values': sunburst_values,
    'hover_lfc_text': sunburst_hover_lfc_text,
    'hover_desc_text': sunburst_hover_desc_text,
    'node_category': sunburst_node_category
})
print("Sunburst data DataFrame created.")

# 3. Generate Individual go.Sunburst Charts
morph_go_charts = {}

unique_morphs_from_data = sunburst_data_df['ids'][sunburst_data_df['parents'] == ''].unique()

all_unique_categories = df['Category'].unique()
category_color_map = {category: color for category, color in zip(all_unique_categories, px.colors.qualitative.Plotly * (len(all_unique_categories) // len(px.colors.qualitative.Plotly) + 1))}

for morph_name in unique_morphs_from_data:
    df_filtered_for_morph = sunburst_data_df[
        (sunburst_data_df['ids'] == morph_name) |
        (sunburst_data_df['ids'].str.startswith(f"{morph_name}_"))
    ].copy()

    node_colors = []
    for category_val in df_filtered_for_morph['node_category']:
        if category_val is None:
            node_colors.append('#cccccc')
        else:
            node_colors.append(category_color_map.get(category_val, '#000000'))

    customdata_list = df_filtered_for_morph[['hover_lfc_text', 'hover_desc_text']].values.tolist()

    hovertemplate = '<b>%{label}</b><br>' + \
                    '%{customdata[0]}<br>' + \
                    '%{customdata[1]}<extra></extra>'

    sunburst_trace = go.Sunburst(
        ids=df_filtered_for_morph['ids'],
        labels=df_filtered_for_morph['labels'],
        parents=df_filtered_for_morph['parents'],
        values=df_filtered_for_morph['values'],
        customdata=customdata_list,
        hovertemplate=hovertemplate,
        marker=dict(colors=node_colors),
        insidetextfont=dict(size=22)
    )

    fig = go.Figure(sunburst_trace)
    fig.update_layout(title_text=f'{morph_name} Gene Expression Sunburst Chart')
    morph_go_charts[morph_name] = fig
print(f"Generated {len(morph_go_charts)} individual go.Sunburst charts.")

# 4. Combine and Display Sunburst Charts
num_charts = len(morph_go_charts)
subplot_titles = [''] * num_charts # Empty to remove individual titles

fig = make_subplots(
    rows=1,
    cols=num_charts,
    specs=[[{'type': 'domain'}] * num_charts],
    subplot_titles=subplot_titles
)

for i, (morph_name, individual_fig) in enumerate(morph_go_charts.items()):
    fig.add_trace(individual_fig.data[0], row=1, col=i + 1)

fig.update_layout(
    title_text='Gene Expression in <i>S. pistillata</i> planulae morphs',
    title_x=0.5,
    title_font_size=36,
    width=1920,
    height=1080,
    font_family='serif'
)

fig.show()
print("Combined Sunburst charts displayed.")

# 5. Save Combined Sunburst Charts to HTML
fig.write_html('combined_sunburst_charts_custom.html')
print("Combined Sunburst chart saved as combined_sunburst_charts_custom.html")

Data loaded and cleaned.
Sunburst data DataFrame created.
Generated 2 individual go.Sunburst charts.


Combined Sunburst charts displayed.
Combined Sunburst chart saved as combined_sunburst_charts_custom.html
