In [None]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as offline

In [None]:
offline.init_notebook_mode(connected=True)
pio.kaleido.scope.default_format = 'pdf'

### Fig. 2A

In [None]:
model_list = ['Phyto-Reasoner', 'Phyto-Chatbot', 'GPT-4.1', 'o3', 'Gemini-2.5-Pro', 'Claude-3.7-Sonnet', 'Grok-3-Beta', 'Deepseek-V3', 'Deepseek-R1']
gi_y_list = [
    [0.796178344,0.8,0.661616162,0.62,0.67,0.689922481],
    [0.643312102,0.693333333,0.575757576,0.53,0.75,0.569767442],
    [0.477707006,0.32,0.222222222,0.32,0.31,0.298449612],
    [0.573248408,0.346666667,0.237373737,0.39,0.6,0.364341085],
    [0.592356688,0.32,0.267676768,0.37,0.53,0.403100775],
    [0.649681529,0.333333333,0.207070707,0.44,0.58,0.317829457],
    [0.433121019,0.226666667,0.196969697,0.28,0.45,0.182170543],
    [0.477707006,0.253333333,0.202020202,0.3,0.39,0.209302326],
    [0.47133758,0.253333333,0.186868687,0.27,0.44,0.213178295],
]
gi_color_list = ['rgb(0,102,204)', 'rgb(0,102,204)', 'rgb(128,128,128)', 'rgb(128,128,128)', 'rgb(128,128,128)', 'rgb(128,128,128)', 'rgb(128,128,128)', 'rgb(128,128,128)', 'rgb(128,128,128)']

ft_y_list = [
    0.0872944191425795,
    0.0578708942924461,
    0.0140092993689062,
    0.00840265756099903,
    0.0134233899085525,
    0.019857641750849,
    0.0276813871949195,
    0.0203150107637677,
    0.0171363772464254,
]
ft_color_list = ['rgb(102,163,255)', 'rgb(102,163,255)', 'rgb(179,179,179)', 'rgb(179,179,179)', 'rgb(179,179,179)', 'rgb(179,179,179)', 'rgb(179,179,179)', 'rgb(179,179,179)', 'rgb(179,179,179)']

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    x=model_list,
    y=[np.mean(yl)*100-20 for yl in gi_y_list],
    marker_color=gi_color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))
fig.add_trace(go.Bar(
    x=model_list,
    y=-np.array(ft_y_list)*600,
    marker_color=ft_color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))

fig.update_layout(
    barmode='relative',
    showlegend=False,
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Accuracy rate (%)', 'range': [-60, 60]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1.9598*400,
    height=5.2761*400)

fig.show()
file_prefix = f'fig.2a.phytobench-knowledge.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

### Fig. 2B

In [None]:
model_list = ['Phyto-Reasoner', 'Phyto-Chatbot', 'GPT-4.1', 'o3', 'Gemini-2.5-Pro', 'Claude-3.7-Sonnet', 'Grok-3-Beta', 'Deepseek-V3', 'Deepseek-R1']
bi_list = [0.875799086757991, 0.857534246575342, 0.348122866894198, 0.271331058020478, 0.157077625570776, 0.282191780821918, 0.247440273037543, 0.237442922374429, 0.171689497716895]
bi_color_list = ['rgb(31,113,179)', 'rgb(31,113,179)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)']

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    x=model_list,
    y=[bi * 100 for bi in bi_list],
    marker_color=bi_color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
    ),
)

fig.update_layout(
    showlegend=False,
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Accuracy rate (%)', 'range': [0, 100]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=5.2761*400,
    height=1.9598*400)

fig.show()
file_prefix = f'fig.2b.phytobench-data.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

### Fig. 2C

In [None]:
model_list = ['Phyto-Reasoner', 'Phyto-Chatbot', 'GPT-4.1', 'o3', 'Gemini-2.5-Pro', 'Claude-3.7-Sonnet', 'Grok-3-Beta', 'Deepseek-V3', 'Deepseek-R1']

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Violin(
    x=['Agro-Reasoner' for _ in range(50)],
    y=[50,25,50,25,50,25,50,50,50,25,25,100,75,100,25,50,40,50,50,25,100,50,100,100,75,40,50,50,50,40,50,50,75,75,50,25,50,25,50,25,50,40,20,25,25,25,50,50,25,50],
    name='Agro-Reasoner',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(31,113,179)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Agro-Chatbot' for _ in range(50)],
    y=[20,40,25,40,25,50,25,25,25,50,75,75,75,25,50,25,50,20,50,50,50,75,100,100,75,20,100,20,20,20,50,25,50,50,50,20,20,25,20,20,25,25,25,25,50,25,25,25,50,75],
    name='Agro-Chatbot',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(31,113,179)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['GPT-4.1' for _ in range(50)],
    y=[20,25,25,25,25,50,25,50,20,50,25,25,25,50,50,20,25,25,25,20,80,75,75,75,25,75,75,50,40,60,50,50,75,50,50,25,25,25,25,20,25,20,20,20,20,25,25,25,25,25],
    name='GPT-4.1',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['o3' for _ in range(50)],
    y=[100,25,25,25,20,20,25,50,50,25,25,25,25,50,25,20,25,20,20,20,40,100,100,25,40,25,20,25,25,20,25,100,20,25,25,25,25,25,20,20,20,25,25,25,20,25,25,25,25,25],
    name='o3',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Gemini-2.5-Pro' for _ in range(50)],
    y=[25,25,25,25,40,50,25,50,25,25,25,25,25,100,25,25,25,25,60,25,25,25,75,25,75,50,25,25,50,75,100,25,50,40,25,25,25,50,25,25,25,25,25,20,25,20,25,33.33333333,75,25],
    name='Gemini-2.5-Pro',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Claude-3.7-Sonnet' for _ in range(50)],
    y=[25,20,25,25,25,25,25,25,40,50,25,25,25,25,25,50,25,20,50,50,80,80,60,75,40,40,25,75,75,100,20,20,25,25,20,25,75,25,25,25,25,20,25,25,20,20,20,25,20,25],
    name='Claude-3.7-Sonnet',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Grok-3-Beta' for _ in range(50)],
    y=[50,25,25,50,40,25,25,25,75,50,25,25,25,25,25,25,50,20,20,60,100,100,100,100,75,50,75,50,75,50,50,25,50,25,25,25,25,25,25,25,20,25,25,25,100,25,25,25,20,25],
    name='Grok-3-Beta',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Deepseek-V3' for _ in range(50)],
    y=[20,25,25,25,25,25,25,25,25,25,25,25,50,25,25,25,40,50,25,25,75,50,75,75,100,75,50,50,75,66.66666667,25,50,25,25,25,25,25,25,25,50,25,20,20,20,20,25,25,25,20,25],
    name='Deepseek-V3',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))
fig.add_trace(go.Violin(
    x=['Deepseek-R1' for _ in range(50)],
    y=[25,20,20,25,20,25,25,25,50,75,50,25,25,50,25,25,50,25,25,50,75,75,25,60,100,60,60,40,25,60,50,40,25,50,25,50,25,50,25,25,40,20,40,100,20,25,25,50,25,25],
    name='Deepseek-R1',
    line_color='rgb(0,0,0)',
    fillcolor='rgb(208,210,211)',
    box_visible=True,
    meanline_visible=True))

fig.update_layout(
    showlegend=False,
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Goal completion (%)', 'range': [0, 100]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1920,
    height=1080)

fig.show()
file_prefix = f'fig.2c.model.analyst-agnet.violin'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

### Fig. 2E

### Fig. 2F

In [None]:
model_list = ['Phytomni', 'Deepseek-V3', 'Deepseek-R1', 'GPT-4.1', 'o3', 'Gemini-2.5-Pro', 'Claude-3.7-Sonnet', 'Grok-3-Beta']
y_list = [95, 80, 85, 82, 88, 89, 86, 87]
color_list = ['rgb(104,175,215)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)']

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    y=[y-70 for y in y_list],
    marker_color=color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))

fig.update_layout(
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Key points (%)', 'range': [0, 30]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1080/9*8,
    height=1080)

fig.show()
file_prefix = f'fig.2f.phytobench-gene.well_studied.key_points.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

In [None]:
y_list = [5, 13.83, 21.02, 16.09, 23.82, 6.09, 15, 27]

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    y=y_list,
    marker_color=color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))

fig.update_layout(
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Confabulations (%)', 'range': [0, 30]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1080/9*8,
    height=1080)

fig.show()
file_prefix = f'fig.2f.phytobench-gene.well_studied.confabulations.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

### Fig. 2G

In [None]:
model_list = ['Phytomni', 'Deepseek-V3', 'Deepseek-R1', 'GPT-4.1', 'o3', 'Gemini-2.5-Pro', 'Claude-3.7-Sonnet', 'Grok-3-Beta']
y_list = [95, 80, 85, 82, 88, 89, 86, 87]
color_list = ['rgb(104,175,215)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)', 'rgb(208,210,211)']

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    y=[y-70 for y in y_list],
    marker_color=color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))

fig.update_layout(
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Key points (%)', 'range': [0, 30]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1080/9*8,
    height=1080)

fig.show()
file_prefix = f'fig.2g.phytobench-gene.uncharacterized.key_points.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')

In [None]:
y_list = [5, 13.83, 21.02, 16.09, 23.82, 6.09, 15, 27]

In [None]:
line_width = 2

fig = go.Figure()

fig.add_trace(go.Bar(
    y=y_list,
    marker_color=color_list,
    marker_line_color='rgb(0,0,0)',
    marker_line_width=line_width,
    textposition='outside',
))

fig.update_layout(
    xaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width},
    yaxis={'showline': True, 'linewidth': line_width, 'linecolor': 'rgb(0,0,0)', 'mirror': True, 'ticks': 'outside', 'tickwidth': line_width, 'title_text': 'Confabulations (%)', 'range': [0, 30]},
    plot_bgcolor='white',
    font_family='Arial',
    font_color='rgb(0,0,0)',
    font_size=20,
    width=1080/9*8,
    height=1080)

fig.show()
file_prefix = f'fig.2f.phytobench-gene.uncharacterized.confabulations.bar'.lower().replace(' ', '_')
fig.write_image(f'{file_prefix}.pdf')
fig.write_image(f'{file_prefix}.png')