In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob
import csv
import plotly.express as px
import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.subplots import make_subplots


In [2]:
folder = "../../../data/surveys/responses/"
response_list = glob.glob(F"{folder}*")

folder = "../../../data/surveys/surveys/"
survey_list = [F"{folder}{lst[59:]}" for lst in response_list]

df_survey = pd.read_csv(survey_list[0], sep='\t')
df_survey.rename(columns={ df_survey.columns[0]: "Index" }, inplace = True)
df_response = pd.read_csv(response_list[0], header=None).iloc[: , 1:].T
df_response.rename(columns={ df_response.columns[0]: "Sentence", 
                            df_response.columns[1]: "Result" }, 
                            inplace = True)


# Surveys
df_surveys = pd.concat((pd.read_csv(f, sep='\t') for f in survey_list), ignore_index=True)
df_surveys.rename(columns={ df_surveys.columns[0]: "Index" }, inplace = True)

# Response
df_responses = pd.concat((pd.read_csv(f, header=None).iloc[: , 1:].T for f in response_list))
df_responses.rename(columns={ df_responses.columns[0]: "Sentence", 
                            df_responses.columns[1]: "Result" }, 
                            inplace = True)
# Melt
df_melt = pd.melt(df_surveys, id_vars=["Species",
                                       "Main Trait",
                                       "SIM",
                                       "Dataset"],
                             value_vars=["1", "2", "3", "4", "5",],
                             value_name="Sentence"
                
)

df_melt = df_melt.dropna()

# Drop duplicates in each dataframe
df_responses = df_responses.drop_duplicates()
df_melt = df_melt.drop_duplicates()

# Merge the dataframes based on the 'Sentence' column
df = pd.merge(df_melt, df_responses, on='Sentence')
df = df.drop(columns=["variable"])

In [3]:
folder = "../../../data/surveys/"
file = "df_survey_result.csv"

df = pd.read_csv(F"{folder}{file}")

### Plotly

In [15]:
df_plot = df.groupby(["SIM", "Result"]).count()["Species"]
df_plot.name = "Data"
df_plot

labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']
colors = [
     '#d62728', 
     '#1f77b4',
     'goldenrod',
     '#2ca02c', 
     ]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Bert", labels].values,
                     name="Bert Similarity",
                     sort=False,
                     marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Jacc", labels].values,
                     name="Jaccard Similarity",
                     sort=False,
                     marker_colors=colors),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+name",)

fig.update_layout(
    title_text="Trait Annotations Results Per Smililarity", title_x=0.22,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='BERT', x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='JACC', x=0.83, y=0.5, font_size=20, showarrow=False)],
               #   legend={'traceorder':'normal'}
              #  margin = dict(t=0, l=0, r=0, b=0),
               width=800, height=400,
                 )
fig.show()


folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerSimilarity.html"
# fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerSimilarity.pdf"
fig.write_image(F"{folder_figures}{file_name}")

In [16]:

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}],
                                           [{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, sim in enumerate(df["SIM"].unique()):
    for j, dataset in enumerate(df["Dataset"].unique()):
        df_sub = df[(df["SIM"] == sim) & (df["Dataset"] == dataset)]
        fig.add_trace(go.Pie(labels=labels, 
                             values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                             name=sim + ' ' + dataset,
                             sort=False,
                             marker_colors=colors),
                      i+1, j+1)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    # title_text="Trait Annotation",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset Bert',  x=0.050, y=1.1, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Bert',      x=0.500, y=1.1, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Bert', x=0.950, y=1.1, font_size=15, showarrow=False),
                 dict(text='PlantNet Dataset Jacc',  x=0.050, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Jacc',      x=0.500, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Jacc', x=0.950, y=-0.12, font_size=15, showarrow=False)],
            #    margin = dict(t=0, l=0, r=0, b=0),
               width=1200, height=400,

)

fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerDatasetPerSimilarity.html"
fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerDatasetPerSimilarity.pdf"
fig.write_image(F"{folder_figures}{file_name}")

In [20]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, dataset in enumerate(df["Dataset"].unique()):
    df_sub = df[df["Dataset"] == dataset]
    fig.add_trace(go.Pie(labels=labels, 
                            values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                            name=dataset,
                            direction ='clockwise',
                            sort=False,
                            marker_colors=colors),                   
                    col=i+1, row=1,)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+value")

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset',  x=0.07, y=0.5, font_size=15, showarrow=False),
                 dict(text='Palm Dataset',      x=0.500, y=0.5, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset', x=0.935, y=0.5, font_size=15, showarrow=False)],
    #  margin = dict(t=0, l=0, r=0, b=0),
    width=1200, height=400,
                 )

fig.show()

folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerDataset.html"
# fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerDataset.pdf"
fig.write_image(F"{folder_figures}{file_name}")

#### Percentage

In [52]:
color_dict

{'None of the above': '#d62728',
 'Can infer correct Entity': '#1f77b4',
 'Can infer correct Quality': 'goldenrod',
 'Can infer correct Value': '#2ca02c',
 'Caribbean': 'darkblue',
 'Palm': 'darkblue',
 'PlantNet': 'darkblue'}

In [21]:
df["Total"] = "Total"
df_percentage = round(df.groupby(["Total", "Dataset", "Main Trait", "Result"])["Species"].count() / df.groupby(["Main Trait"])["Species"].count() * 100, 2).to_frame()
df_percentage = df_percentage\
    .rename(columns={"Species": "Percentage"})\
    .reset_index()


color_dict = {label: color for color, label in zip(colors, labels)}
color_dict.update({dataset: 'darkblue' for dataset in df_percentage["Dataset"].unique()})
color_dict["Total"] = 'red'


# color_dict['(?)'] = 'black'

fig = px.sunburst(df_percentage, 
                  path=['Total', 'Dataset', 'Main Trait', 'Result'], 
                  values='Percentage', 
                  color='Result',
                  branchvalues="total",
                  color_discrete_map=color_dict,
                  )

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    uniformtext=dict(minsize=8, mode='hide')
)

# Manual edites
# fig['data'][0]['values'][-4:] = 4000

fig.show()

# print(fig)


# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Percentage.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Percentage.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

In [115]:
fig['data'][0]['values'][-4:] = 103

In [120]:
fig['layout']

Layout({
    'legend': {'tracegroupgap': 0},
    'margin': {'b': 0, 'l': 0, 'r': 0, 't': 0},
    'template': '...',
    'uniformtext': {'minsize': 8, 'mode': 'hide'}
})

#### Normal

In [None]:
# labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

# colors = [
#     'rgb(175, 49, 35)', 
#     'rgb(151, 179, 100)',
#     'rgb(33, 75, 99)',
#     'rgb(79, 129, 102)',
#       ]

# color_dict = {label: color for color, label in zip(colors, labels)}
# color_dict['(?)'] = 'black'

# fig = px.sunburst(df_melt, 
#                   path=['Main Trait', 'variable'], 
#                   values='value', 
#                   color='variable',
#                   color_discrete_map=color_dict)

# fig.update_layout(
#     margin = dict(t=0, l=0, r=0, b=0),
#     uniformtext=dict(minsize=8, mode='hide')
# )

# fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Grouped.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Grouped.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

#### Dataset/Group

In [None]:
labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

colors = [
    'rgb(175, 49, 35)', 
    'rgb(151, 179, 100)',
    'rgb(33, 75, 99)',
    'rgb(79, 129, 102)',
      ]

color_dict = {label: color for color, label in zip(colors, labels)}
color_dict['(?)'] = 'black'
color_dict['Total'] = 'blue'

fig = px.sunburst(df_melt, 
                  path=['Dataset', 'Main Trait', 'variable'], 
                  values='value', 
                  color='variable',
                  color_discrete_map=color_dict,
                  # branchvalues='remainder',
                  # maxdepth=-1,
                  # hover_data=['Dataset', 'Main Trait', 'variable'],
                  # hover_name=['Dataset', 'Main Trait', 'variable'],
                  )

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    # uniformtext=dict(minsize=4, mode='hide', )
)

fig.show()

folder_figures = "../../../reports/figures/"
file_name = "PxPlot_HTML_SurveysPerDataset_PerTrait_Grouped.html"
fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerTrait_PerTrait_Grouped.pdf"
fig.write_image(F"{folder_figures}{file_name}")