In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob
import csv
import plotly.express as px
import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.subplots import make_subplots


### Functions

In [5]:
def build_hierarchical_dataframe(df, levels, value_column, color_columns=None):
    """
    Build a hierarchy of levels for Sunburst or Treemap charts.

    Levels are given starting from the bottom to the top of the hierarchy,
    ie the last level corresponds to the root.
    """
    df_all_trees = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
    for i, level in enumerate(levels):
        df_tree = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
        dfg = df.groupby(levels[i:]).sum()
        dfg = dfg.reset_index()
        df_tree['id'] = dfg[level].copy()
        if i < len(levels) - 1:
            df_tree['parent'] = dfg[levels[i+1]].copy()
        else:
            df_tree['parent'] = 'total'
        df_tree['value'] = dfg[value_column]
        df_tree['color'] = dfg[color_columns[0]] / dfg[color_columns[1]]
        df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
    total = pd.Series(dict(id='total', parent='',
                              value=df[value_column].sum(),
                              color=df[color_columns[0]].sum() / df[color_columns[1]].sum()))
    df_all_trees = df_all_trees.append(total, ignore_index=True)
    return df_all_trees

In [3]:
folder = "../../../data/surveys/"
file = "df_survey_result.csv"

df = pd.read_csv(F"{folder}{file}")

In [6]:
df["Total"] = "Total"
# round(df.groupby(["Total", "Dataset", "Main Trait", "Result"])["Species"].count() / df.groupby(["Main Trait"])["Species"].count() * 100, 2).to_frame()
df_sun = df.groupby(["Total", "Dataset", "Main Trait", "Result"])["Species"]\
    .count()\
    .to_frame()\
    .reset_index()\
    .rename(columns={"Species": "Trait count"})

df_temp = df.groupby(["Main Trait"])["Species"]\
    .count()\
    .to_frame()\
    .reset_index()\
    .rename(columns={"Species": "Trait total"})

df_sun = pd.merge(df_sun, df_temp, on="Main Trait")

# df_sun = build_hierarchical_dataframe(df_sun, ["Main Trait", "Dataset", "Result"], "Trait count", ["Trait count", "Trait total"])
df_all_trees = build_hierarchical_dataframe(df_sun, ["Main Trait", "Dataset", "Result"], "Trait count", ["Trait count", "Trait total"])
average_score = df_sun['Trait count'].sum() / df_sun['Trait total'].sum()

  df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
  df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
  df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
  df_all_trees = df_all_trees.append(total, ignore_index=True)


### Plotly

#### Subplots

In [8]:
df_plot = df.groupby(["SIM", "Result"]).count()["Species"]
df_plot.name = "Data"
df_plot

labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']
colors = [
     '#d62728', 
     '#1f77b4',
     'goldenrod',
     '#2ca02c', 
     ]

In [15]:


# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Bert", labels].values,
                     name="Bert Similarity",
                     sort=False,
                     marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Jacc", labels].values,
                     name="Jaccard Similarity",
                     sort=False,
                     marker_colors=colors),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+name",)

fig.update_layout(
    title_text="Trait Annotations Results Per Smililarity", title_x=0.22,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='BERT', x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='JACC', x=0.83, y=0.5, font_size=20, showarrow=False)],
               #   legend={'traceorder':'normal'}
              #  margin = dict(t=0, l=0, r=0, b=0),
               width=800, height=400,
                 )
fig.show()


folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerSimilarity.html"
# fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerSimilarity.pdf"
fig.write_image(F"{folder_figures}{file_name}")

In [16]:

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}],
                                           [{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, sim in enumerate(df["SIM"].unique()):
    for j, dataset in enumerate(df["Dataset"].unique()):
        df_sub = df[(df["SIM"] == sim) & (df["Dataset"] == dataset)]
        fig.add_trace(go.Pie(labels=labels, 
                             values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                             name=sim + ' ' + dataset,
                             sort=False,
                             marker_colors=colors),
                      i+1, j+1)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    # title_text="Trait Annotation",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset Bert',  x=0.050, y=1.1, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Bert',      x=0.500, y=1.1, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Bert', x=0.950, y=1.1, font_size=15, showarrow=False),
                 dict(text='PlantNet Dataset Jacc',  x=0.050, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Jacc',      x=0.500, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Jacc', x=0.950, y=-0.12, font_size=15, showarrow=False)],
            #    margin = dict(t=0, l=0, r=0, b=0),
               width=1200, height=400,

)

fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerDatasetPerSimilarity.html"
fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerDatasetPerSimilarity.pdf"
fig.write_image(F"{folder_figures}{file_name}")

In [20]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, dataset in enumerate(df["Dataset"].unique()):
    df_sub = df[df["Dataset"] == dataset]
    fig.add_trace(go.Pie(labels=labels, 
                            values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                            name=dataset,
                            direction ='clockwise',
                            sort=False,
                            marker_colors=colors),                   
                    col=i+1, row=1,)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+value")

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset',  x=0.07, y=0.5, font_size=15, showarrow=False),
                 dict(text='Palm Dataset',      x=0.500, y=0.5, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset', x=0.935, y=0.5, font_size=15, showarrow=False)],
    #  margin = dict(t=0, l=0, r=0, b=0),
    width=1200, height=400,
                 )

fig.show()

folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerDataset.html"
# fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerDataset.pdf"
fig.write_image(F"{folder_figures}{file_name}")

#### Percentage

In [16]:
df["Total"] = "Total"
df_percentage = round(df.groupby(["Total", "Dataset", "Main Trait", "Result"])["Species"].count() / df.groupby(["Main Trait"])["Species"].count() * 100, 2).to_frame()
df_percentage = df_percentage\
    .rename(columns={"Species": "Percentage"})\
    .reset_index()

df_percentage['Color'] = pd.Series(color_dict.keys())

In [17]:
df_percentage

Unnamed: 0,Total,Dataset,Main Trait,Result,Percentage,Color
0,Total,Caribbean,Fruit colour,Can infer correct Entity,31.15,None of the above
1,Total,Caribbean,Fruit colour,Can infer correct Quality,13.11,Can infer correct Entity
2,Total,Caribbean,Fruit colour,Can infer correct Value,27.87,Can infer correct Quality
3,Total,Caribbean,Fruit colour,None of the above,27.87,Can infer correct Value
4,Total,Caribbean,Fruit shape,Can infer correct Entity,40.00,Caribbean
...,...,...,...,...,...,...
147,Total,PlantNet,trunk and root,None of the above,40.00,
148,Total,PlantNet,vein,Can infer correct Entity,22.86,
149,Total,PlantNet,vein,Can infer correct Quality,14.29,
150,Total,PlantNet,vein,Can infer correct Value,2.86,


In [29]:
df["Total"] = "Total"
df_percentage = round(df.groupby(["Total", "Dataset", "Main Trait", "Result"])["Species"].count() / df.groupby(["Main Trait"])["Species"].count() * 100, 2).to_frame()
df_percentage = df_percentage\
    .rename(columns={"Species": "Percentage"})\
    .reset_index()


color_dict = {label: color for color, label in zip(colors, labels)}
color_dict.update({dataset: 'darkblue' for dataset in df_percentage["Dataset"].unique()})
color_dict["Total"] = 'red'

extra_colors = {"Result": df_percentage["Result"].to_list() + list(color_dict.keys())}
df_percentage = df_percentage.append(pd.DataFrame(extra_colors))

# color_dict['(?)'] = 'black'

fig = px.sunburst(df_percentage, 
                  path=['Total', 'Dataset', 'Main Trait', 'Result'], 
                  values='Percentage', 
                  color='Result',
                  branchvalues="total",
                  color_discrete_map=color_dict,
                  )

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    uniformtext=dict(minsize=8, mode='hide')
)

# Manual edites
# fig['data'][0]['values'][-4:] = 4000

fig.show()

# print(fig)


# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Percentage.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Percentage.pdf"
# fig.write_image(F"{folder_figures}{file_name}")


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



ValueError: ('None entries cannot have not-None children', Total                              nan
Dataset                            nan
Main Trait                         nan
Result        Can infer correct Entity
Name: 152, dtype: object)

In [30]:
df_percentage

Unnamed: 0,Total,Dataset,Main Trait,Result,Percentage
0,Total,Caribbean,Fruit colour,Can infer correct Entity,31.15
1,Total,Caribbean,Fruit colour,Can infer correct Quality,13.11
2,Total,Caribbean,Fruit colour,Can infer correct Value,27.87
3,Total,Caribbean,Fruit colour,None of the above,27.87
4,Total,Caribbean,Fruit shape,Can infer correct Entity,40.00
...,...,...,...,...,...
155,,,,Can infer correct Value,
156,,,,Caribbean,
157,,,,Palm,
158,,,,PlantNet,


In [84]:
fig = go.Figure(go.Sunburst(
    labels=df_all_trees['id'],
    parents=df_all_trees['parent'],
    values=df_all_trees['value'],
#     branchvalues='total',
#     marker=dict(
#         colors=df_all_trees['color'],
#         colorscale='RdBu',
#         cmid=average_score),
#     hovertemplate='<b>%{label} </b> <br> Sales: %{value}<br> Success rate: %{color:.2f}',
#     name=''
    )
)
fig.show()

#### Normal

In [None]:
# labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

# colors = [
#     'rgb(175, 49, 35)', 
#     'rgb(151, 179, 100)',
#     'rgb(33, 75, 99)',
#     'rgb(79, 129, 102)',
#       ]

# color_dict = {label: color for color, label in zip(colors, labels)}
# color_dict['(?)'] = 'black'

# fig = px.sunburst(df_melt, 
#                   path=['Main Trait', 'variable'], 
#                   values='value', 
#                   color='variable',
#                   color_discrete_map=color_dict)

# fig.update_layout(
#     margin = dict(t=0, l=0, r=0, b=0),
#     uniformtext=dict(minsize=8, mode='hide')
# )

# fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Grouped.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Grouped.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

#### Dataset/Group

In [6]:
labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

colors = [
    'rgb(175, 49, 35)', 
    'rgb(151, 179, 100)',
    'rgb(33, 75, 99)',
    'rgb(79, 129, 102)',
      ]

color_dict = {label: color for color, label in zip(colors, labels)}
color_dict['(?)'] = 'black'
color_dict['Total'] = 'blue'

fig = px.sunburst(df_melt, 
                  path=['Dataset', 'Main Trait', 'variable'], 
                  values='value', 
                  color='variable',
                  color_discrete_map=color_dict,
                  # branchvalues='remainder',
                  # maxdepth=-1,
                  # hover_data=['Dataset', 'Main Trait', 'variable'],
                  # hover_name=['Dataset', 'Main Trait', 'variable'],
                  )

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    # uniformtext=dict(minsize=4, mode='hide', )
)

fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerDataset_PerTrait_Grouped.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_PerTrait_Grouped.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

NameError: name 'df_melt' is not defined