In [23]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob
import csv
import plotly.express as px
import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.subplots import make_subplots


In [11]:
folder = "../../../data/surveys/responses/"
response_list = glob.glob(F"{folder}*")

folder = "../../../data/surveys/surveys/"
survey_list = [F"{folder}{lst[59:]}" for lst in response_list]

df_survey = pd.read_csv(survey_list[0], sep='\t')
df_survey.rename(columns={ df_survey.columns[0]: "Index" }, inplace = True)
df_response = pd.read_csv(response_list[0], header=None).iloc[: , 1:].T
df_response.rename(columns={ df_response.columns[0]: "Sentence", 
                            df_response.columns[1]: "Result" }, 
                            inplace = True)


# Surveys
df_surveys = pd.concat((pd.read_csv(f, sep='\t') for f in survey_list), ignore_index=True)
df_surveys.rename(columns={ df_surveys.columns[0]: "Index" }, inplace = True)

# Response
df_responses = pd.concat((pd.read_csv(f, header=None).iloc[: , 1:].T for f in response_list))
df_responses.rename(columns={ df_responses.columns[0]: "Sentence", 
                            df_responses.columns[1]: "Result" }, 
                            inplace = True)
# Melt
df_melt = pd.melt(df_surveys, id_vars=["Species",
                                       "Main Trait",
                                       "SIM",
                                       "Dataset"],
                             value_vars=["1", "2", "3", "4", "5",],
                             value_name="Sentence"
                
)

df_melt = df_melt.dropna()

# Drop duplicates in each dataframe
df_responses = df_responses.drop_duplicates()
df_melt = df_melt.drop_duplicates()

# Merge the dataframes based on the 'Sentence' column
df = pd.merge(df_melt, df_responses, on='Sentence')
df = df.drop(columns=["variable"])

### Plotly

In [61]:
df_plot = df.groupby(["SIM", "Result"]).count()["Species"]
df_plot.name = "Data"
df_plot

labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']
colors = [
     '#d62728', 
     '#1f77b4',
     'goldenrod',
     '#2ca02c', 
     ]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Bert", labels].values,
                     name="Bert Similarity",
                     marker_colors=colors),
              1, 1)
fig.add_trace(go.Pie(labels=labels, 
                     values=df_plot.loc["Jacc", labels].values,
                     name="Jaccard Similarity",
                     marker_colors=colors),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+name",)

fig.update_layout(
    title_text="Trait Annotation",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='BERT', x=0.2, y=0.5, font_size=20, showarrow=False),
                 dict(text='JACC', x=0.8, y=0.5, font_size=20, showarrow=False)],
                 legend={'traceorder':'normal'})
fig.show()


In [73]:
df_sub.groupby("Result").count().loc[labels, 'Species'].values

array([80, 46, 11, 77])

In [75]:

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}],
                                           [{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, sim in enumerate(df["SIM"].unique()):
    for j, dataset in enumerate(df["Dataset"].unique()):
        df_sub = df[(df["SIM"] == sim) & (df["Dataset"] == dataset)]
        fig.add_trace(go.Pie(labels=labels, 
                             values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                             name=sim + ' ' + dataset,
                             sort=False,
                             marker_colors=colors),
                      i+1, j+1)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Trait Annotation",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset Bert',  x=0.050, y=1.1, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Bert',      x=0.500, y=1.1, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Bert', x=0.950, y=1.1, font_size=15, showarrow=False),
                 dict(text='PlantNet Dataset Jacc',  x=0.050, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Palm Dataset Jacc',      x=0.500, y=-0.12, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset Jacc', x=0.950, y=-0.12, font_size=15, showarrow=False)]
)

fig.show()

In [76]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# Add pie charts to each subplot
for i, dataset in enumerate(df["Dataset"].unique()):
    df_sub = df[df["Dataset"] == dataset]
    fig.add_trace(go.Pie(labels=labels, 
                            values=df_sub.groupby("Result").count().loc[labels, 'Species'].values,
                            name=dataset,
                            direction ='clockwise',
                            sort=False,
                            marker_colors=colors),                   
                    col=i+1, row=1,)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.6, hoverinfo="label+percent+value")

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='PlantNet Dataset',  x=0.09, y=0.5, font_size=15, showarrow=False),
                 dict(text='Palm Dataset',      x=0.500, y=0.5, font_size=15, showarrow=False),
                 dict(text='Caribbean Dataset', x=0.915, y=0.5, font_size=15, showarrow=False)]
                 )

fig.show()

folder_figures = "../../../reports/figures/"
file_name = "PxPlot_HTML_SurveysPerDataset.html"
fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerDataset.pdf"
fig.write_image(F"{folder_figures}{file_name}")

In [96]:
df.groupby(["Main Trait", "Result", ])["Species"].count()

Main Trait      Result                   
Crown           Can infer correct Quality     4
                Can infer correct Value       4
                None of the above             3
Fruit Colour    Can infer correct Entity      1
                Can infer correct Quality     2
                                             ..
trunk and root  None of the above             4
vein            Can infer correct Entity      8
                Can infer correct Quality     5
                Can infer correct Value       4
                None of the above            21
Name: Species, Length: 161, dtype: int64

In [97]:
round(df["Main Trait", "Result"] / df.groupby(["Main Trait", "Result"])['Species'].transform('sum') * 100, 2)

KeyError: ('Main Trait', 'Result')

In [36]:
labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']
df_melt = pd.melt(df_plot,
                  id_vars=['Main Trait', 'Dataset'],
                #   value_name='Choice',
                  value_vars=labels)

df_melt["Percentage"] = round(df_melt['value'] / df_melt.groupby(['Main Trait'])['value'].transform('sum') * 100, 2)
df_melt['Total'] = 'Total'

In [37]:
df_melt

Unnamed: 0,Main Trait,Dataset,variable,value,Percentage,Total
0,leaf apex,PlantNet,None of the above,1,5.88,Total
1,petiole,PlantNet,None of the above,1,33.33,Total
2,vein,PlantNet,None of the above,1,14.29,Total
3,inflorescences,PlantNet,None of the above,1,10.00,Total
4,inflorescences,PlantNet,None of the above,1,10.00,Total
...,...,...,...,...,...,...
1179,leaf blade,PlantNet,Can infer correct Value,1,33.33,Total
1180,plant type,PlantNet,Can infer correct Value,1,50.00,Total
1181,vein,PlantNet,Can infer correct Value,0,0.00,Total
1182,leaf apex,PlantNet,Can infer correct Value,0,0.00,Total


In [42]:
df1 = px.data.tips()
df1

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [65]:
color_dict

{'None of the above': 'rgb(175, 49, 35)',
 'Can infer correct Entity': 'rgb(151, 179, 100)',
 'Can infer correct Quality': 'rgb(33, 75, 99)',
 'Can infer correct Value': 'rgb(79, 129, 102)',
 '(?)': 'black'}

#### Percentage

In [69]:
labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

colors = [
    'rgb(175, 49, 35)', 
    'rgb(151, 179, 100)',
    'rgb(33, 75, 99)',
    'rgb(79, 129, 102)',
      ]

# colors = [
#     'red', 
#     'yellow',
#     'blue',
#     'green',
#       ]

color_dict = {label: color for color, label in zip(colors, labels)}
# color_dict.update({label: color for color, label in zip(colors, df["Dataset"].unique())})
color_dict['(?)'] = 'black'

fig = px.sunburst(df_melt, 
                  path=['Total', 'Dataset', 'Main Trait', 'variable'], 
                  values='Percentage', 
                  color='variable',
                #   color='Black',
                  hover_data=["variable", "Percentage"],
                  color_discrete_map=color_dict)

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    uniformtext=dict(minsize=8, mode='hide')
)

fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Percentage.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Percentage.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

#### Normal

In [25]:
# labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

# colors = [
#     'rgb(175, 49, 35)', 
#     'rgb(151, 179, 100)',
#     'rgb(33, 75, 99)',
#     'rgb(79, 129, 102)',
#       ]

# color_dict = {label: color for color, label in zip(colors, labels)}
# color_dict['(?)'] = 'black'

# fig = px.sunburst(df_melt, 
#                   path=['Main Trait', 'variable'], 
#                   values='value', 
#                   color='variable',
#                   color_discrete_map=color_dict)

# fig.update_layout(
#     margin = dict(t=0, l=0, r=0, b=0),
#     uniformtext=dict(minsize=8, mode='hide')
# )

# fig.show()

# folder_figures = "../../../reports/figures/"
# file_name = "PxPlot_HTML_SurveysPerTrait_Grouped.html"
# fig.write_html(F"{folder_figures}{file_name}")
# file_name = "PxPlot_PDF_SurveysPerTrait_Grouped.pdf"
# fig.write_image(F"{folder_figures}{file_name}")

#### Dataset/Group

In [24]:
labels = ['None of the above', 'Can infer correct Entity', 'Can infer correct Quality', 'Can infer correct Value']

colors = [
    'rgb(175, 49, 35)', 
    'rgb(151, 179, 100)',
    'rgb(33, 75, 99)',
    'rgb(79, 129, 102)',
      ]

color_dict = {label: color for color, label in zip(colors, labels)}
color_dict['(?)'] = 'black'
color_dict['Total'] = 'blue'

fig = px.sunburst(df_melt, 
                  path=['Dataset', 'Main Trait', 'variable'], 
                  values='value', 
                  color='variable',
                  color_discrete_map=color_dict,
                  # branchvalues='remainder',
                  # maxdepth=-1,
                  # hover_data=['Dataset', 'Main Trait', 'variable'],
                  # hover_name=['Dataset', 'Main Trait', 'variable'],
                  )

fig.update_layout(
    margin = dict(t=0, l=0, r=0, b=0),
    # uniformtext=dict(minsize=4, mode='hide', )
)

fig.show()

folder_figures = "../../../reports/figures/"
file_name = "PxPlot_HTML_SurveysPerDataset_PerTrait_Grouped.html"
fig.write_html(F"{folder_figures}{file_name}")
file_name = "PxPlot_PDF_SurveysPerTrait_PerTrait_Grouped.pdf"
fig.write_image(F"{folder_figures}{file_name}")