# Sunburst Plots for Metabolites:

**Updated on:** 2023-05-30 15:15 CET

In this Jupyter Notebook, we use the ""ASV_SunburstInfo" files generated by "StackedBarPlot_ASV(16s,18s)_sunburstInfo.ipynb" notebooks to create sunburst charts.

**Authors**: Abzer Kelminal (abzer.shah@uni-tuebingen.de) <br>
**Input file format**: .csv files <br>
**Outputs**: .svg images  <br>
**Dependencies**: numpy, pandas, plotly

---
**Necessary input files**:
 The "230530_CCE_ASVs_Phylum_SunburstInfo_Cycle_1_day1" csv files of all cycles: From Cycle 1 to Cycle 4 for different depth types as well: Full_depth, surface(0-20 m), deep samples (20-100 m). The plots were obtained for both ASV16S and ASV18SV9 data. 
   
---

# 1. Loading packages and setting working directory

In [None]:
# installing necessary packages (omitted for now)
! pip install plotly

In [None]:
! pip install -U kaleido #to save the plotly images as svg

In [2]:
# importing necessary modules
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
import kaleido
import datetime

In [None]:
# pip show kaleido             #to check if a particular dependency is already installed, here, kaleido

In [126]:
#Setting working directory
directory = input("Enter the path of the folder in the output cell:\n")
os.chdir(directory)

Enter the path of the folder in the output cell:
 G:\My Drive\CCE DATA\P1706_ASV\Stacked_Plots_and_Sunbursts\20230530_ASV18S_Phyla_level_grouping\deep_20-100m


In [127]:
#get the current working directory (to check)
path= os.getcwd()
path

'G:\\My Drive\\CCE DATA\\P1706_ASV\\Stacked_Plots_and_Sunbursts\\20230530_ASV18S_Phyla_level_grouping\\deep_20-100m'

# 2. Load the input files

In [128]:
#Lists all the csv files in the working directory:

# to store files in a list
names = []

for x in os.listdir(): 
    if x.endswith(".csv"):
        print(x) # Prints only csv file present in My Folder
        names.append(x) #adding the filenames to the list 'names'

230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_1_day1_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_1_day2_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_1_day3_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_2_day1_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_2_day2_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_2_day3_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_2_day4_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_3_day1_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_3_day2_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_3_day3_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_4_day1_deep_20_100m.csv
230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_4_day2_deep_20_100m.csv


In [129]:
names[0]

'230530_CCE_ASV18S_Phylum_SunburstInfo_Cycle_1_day1_deep_20_100m.csv'

# 3. Read the input files
Here, we read particular columns of the csv file, then replace NAN values to be empty. Finally, we add a column 'middle' with the cycle name of the file. This 'middle' will be used as the main branch (the centre part) of the sunburst chart.

In [130]:
pd.read_csv(names[0]).head() #reading the 1st file

Unnamed: 0,CCE_P1706_39.mzxml,CCE_P1706_41.mzxml,CCE_P1706_43.mzxml,CCE_P1706_45.mzxml,CCE_P1706_40.mzxml,CCE_P1706_42.mzxml,CCE_P1706_44.mzxml,CCE_P1706_46.mzxml,Avg,Domain,Kingdom,InterestedTaxon,Class,Order,Family,Genus,Species,Confidence
0,0,0,0,0,0,0,0,0,0.0,Eukaryota,Rhizaria,Radiolaria,Acantharea,Acantharea_X,Acantharea_XX,Acantharea_XXX,Acantharea_XXX_sp.,0.999176
1,0,0,0,0,0,0,0,0,0.0,Eukaryota,Archaeplastida,Chlorophyta,Chlorophyceae,,,,,0.852033
2,0,0,0,0,0,0,0,0,0.0,Eukaryota,Archaeplastida,Chlorophyta,Pyramimonadales,Pyramimonadales_X,Pyramimonadales_XX,Halosphaera,Halosphaera_sp.,0.998185
3,23,0,0,0,23,0,0,0,5.75,Eukaryota,Stramenopiles,Opalozoa,MAST-3,MAST-3I,MAST-3I_X,MAST-3I_XX,MAST-3I_XX_sp.,0.999984
4,0,0,0,10,0,0,0,10,2.5,Eukaryota,Opisthokonta,Metazoa,Arthropoda,Crustacea,Maxillopoda,,,0.981696


In [131]:
# For asv 16s 
#pd.read_csv(names[0])[["Domain", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Species", "Avg"]].replace(np.nan,' ')

In [132]:
df = pd.read_csv(names[0])[["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Avg"]].replace(np.nan,' ')
df['middle'] = np.array(['Cycle 1 Day 1']*len(df))
df.head()

Unnamed: 0,Domain,Kingdom,InterestedTaxon,Class,Order,Family,Genus,Avg,middle
0,Eukaryota,Rhizaria,Radiolaria,Acantharea,Acantharea_X,Acantharea_XX,Acantharea_XXX,0.0,Cycle 1 Day 1
1,Eukaryota,Archaeplastida,Chlorophyta,Chlorophyceae,,,,0.0,Cycle 1 Day 1
2,Eukaryota,Archaeplastida,Chlorophyta,Pyramimonadales,Pyramimonadales_X,Pyramimonadales_XX,Halosphaera,0.0,Cycle 1 Day 1
3,Eukaryota,Stramenopiles,Opalozoa,MAST-3,MAST-3I,MAST-3I_X,MAST-3I_XX,5.75,Cycle 1 Day 1
4,Eukaryota,Opisthokonta,Metazoa,Arthropoda,Crustacea,Maxillopoda,,2.5,Cycle 1 Day 1


In [133]:
df.shape # gives the number of rows and columns

(7424, 9)

# 4. Sunburst Chart visualization:

## i. Visualize the 1st sunburst chart:

In [134]:
fig= px.sunburst(df,
                 #path=["middle","Domain", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Species"], #for asv16s
                 path=["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus"], #for asv18s
                 values="Avg", 
                 color='InterestedTaxon',
                 width=1000,height=1000,
                )
#fig.update_traces(labels=['',] * len(fig.data[0]['labels'])) #to turn off the labels
fig.show(renderer='browser') #result shows in a browser window

## ii. Changing the colors of Phylum levels in surburst charts:

In [135]:
#get the Taxon names
Taxon_names =pd.DataFrame(df["InterestedTaxon"].value_counts(dropna=False)) #counting the dataframe based on superclass names
class_list= np.array(Taxon_names.index.values) #getting the index (rownames) into an array
class_list.sort() #sort the array in Alphabetical order
print(class_list)

['Alveolata_X' 'Apicomplexa' 'Apusomonadidae' 'Centroheliozoa' 'Cercozoa'
 'Chlorophyta' 'Choanoflagellida' 'Ciliophora' 'Conosa' 'Cryptophyta'
 'Dinoflagellata' 'Discoba' 'Eukaryota_XX' 'Foraminifera' 'Fungi'
 'Haptophyta' 'Hilomonadea' 'Katablepharidophyta' 'Lobosa' 'Mesomycetozoa'
 'Metazoa' 'Ochrophyta' 'Opalozoa' 'Opisthokonta_X' 'Picozoa'
 'Pseudofungi' 'Radiolaria' 'Rhodophyta' 'Sagenista' 'Stramenopiles_X'
 'Streptophyta' 'Telonemia']


In [136]:
len(class_list)

32

---
Create a color_list and assign those colors to the phyla level:

In [137]:
color_list =["#7db086","#8e17a4","#9fd430","#ad5ce5","#00ad35","#e56bf0","#488e00","#e331ac","#01b15d","#fc2887",
             "#00854a","#ff6ac9","#acb000","#015ed5","#e3c526","#6a7fff","#ffb933","#0162c4","#868e00","#56449c",
             "#99d682","#ff5aaa","#2f5e03","#d39bff","#5a5700","#b0b8ff","#b31a00","#02c3e1","#ff4f4a","#6ed9b3",
             "#a30655","#a3d395","#9a176a","#bfcc8a","#624680","#ffa151","#0077b9","#ff7c45","#87c6ff","#a11c24",
             "#006b4d","#ff76a3","#7c4506","#e7b6ee","#8f360f","#8b81b5","#ff8b83","#cc93bb","#863b43"]
              
#create a new dictionary 'color_dict' with the 'color_list' colors assigned to 'class_list'
color_dict =  dict(zip(class_list, color_list))
color_dict

{'Alveolata_X': '#7db086',
 'Apicomplexa': '#8e17a4',
 'Apusomonadidae': '#9fd430',
 'Centroheliozoa': '#ad5ce5',
 'Cercozoa': '#00ad35',
 'Chlorophyta': '#e56bf0',
 'Choanoflagellida': '#488e00',
 'Ciliophora': '#e331ac',
 'Conosa': '#01b15d',
 'Cryptophyta': '#fc2887',
 'Dinoflagellata': '#00854a',
 'Discoba': '#ff6ac9',
 'Eukaryota_XX': '#acb000',
 'Foraminifera': '#015ed5',
 'Fungi': '#e3c526',
 'Haptophyta': '#6a7fff',
 'Hilomonadea': '#ffb933',
 'Katablepharidophyta': '#0162c4',
 'Lobosa': '#868e00',
 'Mesomycetozoa': '#56449c',
 'Metazoa': '#99d682',
 'Ochrophyta': '#ff5aaa',
 'Opalozoa': '#2f5e03',
 'Opisthokonta_X': '#d39bff',
 'Picozoa': '#5a5700',
 'Pseudofungi': '#b0b8ff',
 'Radiolaria': '#b31a00',
 'Rhodophyta': '#02c3e1',
 'Sagenista': '#ff4f4a',
 'Stramenopiles_X': '#6ed9b3',
 'Streptophyta': '#a30655',
 'Telonemia': '#a3d395'}

---
Add the corresponding new colors into a 'hex' column of the 'df' dataframe and visualize the sunburst chart with the new colors:

In [138]:
color_dict

{'Alveolata_X': '#7db086',
 'Apicomplexa': '#8e17a4',
 'Apusomonadidae': '#9fd430',
 'Centroheliozoa': '#ad5ce5',
 'Cercozoa': '#00ad35',
 'Chlorophyta': '#e56bf0',
 'Choanoflagellida': '#488e00',
 'Ciliophora': '#e331ac',
 'Conosa': '#01b15d',
 'Cryptophyta': '#fc2887',
 'Dinoflagellata': '#00854a',
 'Discoba': '#ff6ac9',
 'Eukaryota_XX': '#acb000',
 'Foraminifera': '#015ed5',
 'Fungi': '#e3c526',
 'Haptophyta': '#6a7fff',
 'Hilomonadea': '#ffb933',
 'Katablepharidophyta': '#0162c4',
 'Lobosa': '#868e00',
 'Mesomycetozoa': '#56449c',
 'Metazoa': '#99d682',
 'Ochrophyta': '#ff5aaa',
 'Opalozoa': '#2f5e03',
 'Opisthokonta_X': '#d39bff',
 'Picozoa': '#5a5700',
 'Pseudofungi': '#b0b8ff',
 'Radiolaria': '#b31a00',
 'Rhodophyta': '#02c3e1',
 'Sagenista': '#ff4f4a',
 'Stramenopiles_X': '#6ed9b3',
 'Streptophyta': '#a30655',
 'Telonemia': '#a3d395'}

In [139]:
# Add a column 'hex' with 
df['hex'] = np.array([color_dict[phyla] for phyla in df['InterestedTaxon']]) 

colorMapSubset = dict(zip(df.hex, df.hex)) #another dictionary with the colors

df.head()

Unnamed: 0,Domain,Kingdom,InterestedTaxon,Class,Order,Family,Genus,Avg,middle,hex
0,Eukaryota,Rhizaria,Radiolaria,Acantharea,Acantharea_X,Acantharea_XX,Acantharea_XXX,0.0,Cycle 1 Day 1,#b31a00
1,Eukaryota,Archaeplastida,Chlorophyta,Chlorophyceae,,,,0.0,Cycle 1 Day 1,#e56bf0
2,Eukaryota,Archaeplastida,Chlorophyta,Pyramimonadales,Pyramimonadales_X,Pyramimonadales_XX,Halosphaera,0.0,Cycle 1 Day 1,#e56bf0
3,Eukaryota,Stramenopiles,Opalozoa,MAST-3,MAST-3I,MAST-3I_X,MAST-3I_XX,5.75,Cycle 1 Day 1,#2f5e03
4,Eukaryota,Opisthokonta,Metazoa,Arthropoda,Crustacea,Maxillopoda,,2.5,Cycle 1 Day 1,#99d682


In [140]:
fig = px.sunburst(df,
                  #path=["middle","Domain", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Species"], for asv16s
                  path=["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus"], #for asv18s
                  values='Avg',
                  color='hex', #specifying the column for coloring
                  color_discrete_map=colorMapSubset,
                  width=1000,
                  height=1000)
fig.show(renderer='browser')

In [141]:
# creating a folder 'images'
if not os.path.exists("images"):
    os.mkdir("images")

In [119]:
fig.write_image("images/c1d1.svg") #writing the figure as SVG

## iii. FOR loop to get all Sunburst plots

Let's create a list with names for the output svg file:

In [142]:
Date = datetime.date.today() # Get the current date
date_string = Date.strftime("%Y-%m-%d") # Format the date as YYYY-MM-DD

In [143]:
Figure_names = ['Cycle1_Day1', 'Cycle1_Day2', 'Cycle1_Day3',
                'Cycle2_Day1', 'Cycle2_Day2', 'Cycle2_Day3', 'Cycle2_Day4',
                'Cycle3_Day1', 'Cycle3_Day2', 'Cycle3_Day3',
                'Cycle4_Day1', 'Cycle4_Day2']

In [144]:
file_name = []
for name in range(len(Figure_names)):
    x = f"{date_string}_{Figure_names[name]}_asv18s_deep_20-100m_Phyla_Sunburst.svg"
    file_name.append(x)
    
file_name

['2023-05-30_Cycle1_Day1_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle1_Day2_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle1_Day3_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle2_Day1_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle2_Day2_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle2_Day3_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle2_Day4_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle3_Day1_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle3_Day2_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle3_Day3_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle4_Day1_asv18s_deep_20-100m_Phyla_Sunburst.svg',
 '2023-05-30_Cycle4_Day2_asv18s_deep_20-100m_Phyla_Sunburst.svg']

Also, defining cycle names to appear in the middle of each sunburst chart:

In [145]:
Cycle_names = ['Cycle 1 Day 1', 'Cycle 1 Day 2', 'Cycle 1 Day 3',
               'Cycle 2 Day 1', 'Cycle 2 Day 2', 'Cycle 2 Day 3', 'Cycle 2 Day 4',
               'Cycle 3 Day 1', 'Cycle 3 Day 2', 'Cycle 3 Day 3',
               'Cycle 4 Day 1', 'Cycle 4 Day 2']

Finally, creating all the sunburst charts in a for loop and automatically saving it in the images folder:

### Figures without labels:

In [146]:
for i in range(len(names)):
    df = pd.read_csv(names[0])[["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Avg"]].replace(np.nan,' ')
    df['middle'] = np.array([Cycle_names[i]]*len(df))
    df.head()
    
    # Add a column 'hex' with 
    df['hex'] = np.array([color_dict[phyla] for phyla in df['InterestedTaxon']]) 
    colorMapSubset = dict(zip(df.hex, df.hex)) 
    
    fig = px.sunburst(df,
                  #path=["middle","Domain", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Species"], for asv16s
                  path=["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus"], #for asv18s
                  values='Avg',
                  color='hex', #specifying the column for coloring
                  color_discrete_map=colorMapSubset,
                  width=1000,
                  height=1000)
    
    fig.update_traces(labels=['',] * len(fig.data[0]['labels'])) #to turn off the labels
    
    fig.show(renderer='browser')
    fig.write_image(f"images/{file_name[i]}")

### Figures with labels:

In [147]:
for i in range(len(names)):
    df = pd.read_csv(names[0])[["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Avg"]].replace(np.nan,' ')
    df['middle'] = np.array([Cycle_names[i]]*len(df))
    df.head()
    
    # Add a column 'hex' with 
    df['hex'] = np.array([color_dict[phyla] for phyla in df['InterestedTaxon']]) 
    colorMapSubset = dict(zip(df.hex, df.hex)) 
    
    fig = px.sunburst(df,
                      #path=["middle","Domain", "InterestedTaxon", "Class", "Order", "Family", "Genus", "Species"], #for asv16s
                      path=["Domain", "Kingdom", "InterestedTaxon", "Class", "Order", "Family", "Genus"], #for asv18s
                      values='Avg',
                      color='hex', 
                      color_discrete_map=colorMapSubset,
                      width=1000, height=1000)
    
    fig.show(renderer='browser')
    fig.write_image(f"images/{file_name[i]}")