# Figures for "Voice Analysis for Neurological Disorder Recognition - a Systematic Review and Perspective on Emerging Trends"

In [1]:
import os
import pandas as pd 
import plotly.express as px
import plotly.subplots as sp

In [2]:
# import data
all_df = pd.read_csv(os.path.join(
        'data',
        'data_extraction-v03_1-export-sorted.csv'
    ))

disease_list = list(all_df.Disorder.unique())

def get_tasks(df, col_name):
    ### get tasks for each disease as a flat list
    task_list_raw = [x.split(',') for x in df[col_name].dropna().tolist()]  # split by comma
    task_list = [item for sublist in task_list_raw for item in sublist]  # flatten the list
    task_list = [s.strip() for s in task_list]  # remove the ' ' if there is any
    task_list = [s.strip('*') for s in task_list]  # remove the ' ' if there is any

    # task_list.append(out.to_list())
    
    return task_list

def get_sunburst_df(df, col_name):
    ### get dataframe with primary and secondary columns and count for the 2-level sunburst plot
    all_ds_df = pd.DataFrame()
    for ds in disease_list:
        df_temp = all_df[all_df['Disorder'] == ds]
        task_list = get_tasks(df_temp, col_name)
        ds_df = pd.Series(task_list).value_counts().to_frame().reset_index()
        ds_df.rename(columns={0:'count'}, inplace=True)
        ds_df['disease'] = ds
        all_ds_df = all_ds_df.append(ds_df)

    # rename column with a shorter name
    all_ds_df = all_ds_df.replace('Aphasia, dysarthria and dysphonia', 'Speech impairments')
    all_ds_df = all_ds_df.rename(columns={"index": col_name})
    return all_ds_df

In [3]:
def two_level_sunburst(df, lev1, lev2='disease', subplots=False, swap_levels=False, color_map=None):
    """
    Plot and save sunburst plots from the DataFrame with the extracted data.
    
    Parameters
    ----------
    df : DataFrame
        Preprocessed DataFrame with extracted data from literature review.
    lev1 : str
        Column name of the first (outer) level in the sunburst plot.
    lev2 : str
        Column name of the second (inner) level in the sunburst plot.
        'disease' by default.
    subplots : bool
        Switch if a subplot should be generated with lev1 and lev2 in regular
        order for the left, and lev1 and lev2 in reversed order on the right
        plot.
    swap_levels : bool
        SWitch of lev1 and lev2 should be swapped.
    color_map : dict
        Optional dictionary to map the disorder name to the color to be
        assigned in the sunburst plot.
    """
    
    ### Preprocess the raw DataFrame for the sunburst plots
    df_sb = get_sunburst_df(df, lev1)

    if subplots:
        fig1 = px.sunburst(df_sb, path=[lev1, lev2], values='count', color=lev1)
        # In final version: only use color map for plot with disorder inside
        # to have consistent colors with other plots
        fig2 = px.sunburst(df_sb, path=[lev2, lev1], values='count', color=lev2, color_discrete_map=color_map)

        fig = sp.make_subplots(rows=1, cols=2, 
        specs=[
            [{"type": "sunburst"}, {"type": "sunburst"}]
        ],
        subplot_titles=('A',  'B'),
        horizontal_spacing=0.05)

        fig.add_trace(fig1.data[0], row=1, col=1)
        fig.add_trace(fig2.data[0], row=1, col=2)
        fig.update_layout(
            font_size=10,
            width=600,
            height=300,
            margin=dict(l=5,r=5,b=10,t=20)
        )

        fig.write_image(
            os.path.join(
                'figures',
                f'sunburst_{lev1}_subplots.png'  
            )
        )
        fig.write_image(
            os.path.join(
                'figures',
                f'sunburst_{lev1}_subplots.pdf'
            )
        )

    else:
        if swap_levels:
            fig = px.sunburst(df_sb, path=[lev1, lev2], values='count', color=lev1, color_discrete_map=color_map)
        else:
            fig = px.sunburst(df_sb, path=[lev2, lev1], values='count', color=lev2, color_discrete_map=color_map)
        fig.update_layout(
            font_size=10,
            width=300,
            height=300,
            margin=dict(l=5,r=5,b=10,t=10)
        )

        fig.write_image(
            os.path.join(
                'figures',
                f'sunburst_{lev1}.png'  
            )
        )
        fig.write_image(
            os.path.join(
                'figures',
                f'sunburst_{lev1}.pdf'
            )
        )

    fig.show()

In [4]:
# Manually map the color palette of plotly to the disorders of the paper to
# ensure that each disorder has its own color across all plots

# Get list with all occuring disorders
df_cleaned = get_sunburst_df(all_df, 'Tasks')
disorders = list(df_cleaned.disease.unique())

# Get the plotly color palette
# https://community.plotly.com/t/plotly-colours-list/11730
plotly_colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
    '#e377c2',  # raspberry yogurt pink # looks ugly, put to the back
]

# Map the the disorders to the colors
color_map = dict(zip(disorders, plotly_colors)) 

In [5]:
# Write the output once so that an error with a box displaying
# 'Loading [MathJax]/extensions/MathMenu.js' is not getting plotted in the pdf
# https://github.com/plotly/plotly.py/issues/3469
two_level_sunburst(all_df, 'assessment_scale', subplots=False, swap_levels=False, color_map=color_map)

In [6]:
# Sunburst plot with speech tasks outside and disorder inside

two_level_sunburst(all_df, 'Tasks', subplots=False, swap_levels=False, color_map=color_map)
# Rename output figure, since need 1 version with normal and 1 with swapped levels
old_file_png = os.path.join('figures', 'sunburst_Tasks.png')
new_file_png = os.path.join('figures', 'sunburst_Tasks-Disorder_in.png')
old_file_pdf = os.path.join('figures', 'sunburst_Tasks.pdf')
new_file_pdf = os.path.join('figures','sunburst_Tasks-Disorder_in.pdf')
os.rename(old_file_png, new_file_png)
os.rename(old_file_pdf, new_file_pdf)

In [7]:
# Sunburst plot with speech tasks inside and disorder outside

two_level_sunburst(all_df, 'Tasks', subplots=False, swap_levels=True)
# Rename output figure, since need 1 version with normal and 1 with swapped levels
old_file_png = os.path.join('figures', 'sunburst_Tasks.png')
new_file_png = os.path.join('figures', 'sunburst_Tasks-Disorder_out.png')
old_file_pdf = os.path.join('figures', 'sunburst_Tasks.pdf')
new_file_pdf = os.path.join('figures', 'sunburst_Tasks-Disorder_out.pdf')
os.rename(old_file_png, new_file_png)
os.rename(old_file_pdf, new_file_pdf)

In [8]:
# Sunburst plot with feature extraction tools outside and disorder inside

two_level_sunburst(all_df, 'feature_extraction_aggregated', subplots=False, swap_levels=False, color_map=color_map)

In [9]:
# Sunburst plot with predictive modelling approach outside and disorder inside

two_level_sunburst(all_df, 'ml_aggregated', subplots=False, swap_levels=False, color_map=color_map)

# Subplots

For submission, figures are required to readily contain subplots &rarr; generate figures with subplots here.

In [10]:
# Left: sunburst plot with disorder outside and speech tasks inside
# Right: sunburst plot with speech tasks outside and disorder inside

two_level_sunburst(all_df, 'Tasks', subplots=True, swap_levels=False, color_map=color_map)

In [11]:
def two_level_sunburst_subfigs(df, lev1_p1, level1_p2, lev2='disease', color_map=None):
    """
    Extended function of two_level_sunburst() to allow subplots with two
    different columns for 'lev1'.

    lev1_p1 is the first level of subplot 1, level1_p2 is the first level of
    subplot 2.
    """
    
    ### plot two level sunburst with subplots 
    df_sb_p1 = get_sunburst_df(df, lev1_p1)
    df_sb_p2 = get_sunburst_df(df, level1_p2)

    fig1 = px.sunburst(df_sb_p1, path=[lev2, lev1_p1], values='count', color=lev2, color_discrete_map=color_map)
    fig2 = px.sunburst(df_sb_p2, path=[lev2, level1_p2], values='count', color=lev2, color_discrete_map=color_map)

    fig = sp.make_subplots(rows=1, cols=2, 
    specs=[
        [{"type": "sunburst"}, {"type": "sunburst"}]
    ],
    subplot_titles=('A',  'B'),
    horizontal_spacing=0.05)

    fig.add_trace(fig1.data[0], row=1, col=1)
    fig.add_trace(fig2.data[0], row=1, col=2)
    fig.update_layout(
        font_size=10,
        width=600,
        height=300,
        margin=dict(l=5,r=5,b=10,t=20)
    )

    fig.write_image(
        os.path.join(
            'figures',
            f'sunburst-{lev1_p1}-{level1_p2}-subplots.png'  
        )
    )
    fig.write_image(
        os.path.join(
            'figures',
            f'sunburst-{lev1_p1}-{level1_p2}-subplots.pdf'
        )
    )

    fig.show()

In [12]:
# Left: Sunburst plot with feature extraction tools outside and disorder inside
# Right: Sunburst plot with predictive modelling approach outside and disorder inside

two_level_sunburst_subfigs(all_df, 'feature_extraction_aggregated', 'ml_aggregated', color_map=color_map)