Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raget redesign #1895

Merged
merged 6 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions giskard/rag/knowledge_base_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def get_failure_plot(knowledge_base, question_evaluation: Sequence = None):
[knowledge_base._documents_index[doc_id].reduced_embeddings for doc_id in document_ids]
)

TITLE = "Knowledge Base UMAP representation"

topics = [question.metadata["topic"] for question in question_evaluation]
failure_palette = ["#ba0e0e", "#0a980a"]
questions = [question.question for question in question_evaluation]
Expand Down Expand Up @@ -77,14 +75,13 @@ def get_failure_plot(knowledge_base, question_evaluation: Sequence = None):
p = figure(
tools=["pan", "wheel_zoom", "box_zoom", "reset", "save"],
toolbar_location="right",
title=TITLE,
x_range=x_range,
y_range=y_range,
sizing_mode="stretch_width",
)
p.add_tools(hover)
p.toolbar.logo = "grey"
p.background_fill_color = "#efefef"
p.background_fill_color = "#14191B"
p.grid.grid_line_color = "white"

foreground_scatter = p.scatter(
Expand All @@ -105,7 +102,10 @@ def get_failure_plot(knowledge_base, question_evaluation: Sequence = None):
p.legend.location = "top_right"
p.legend.title = "Question Correctness"
p.legend.title_text_font_style = "bold"
p.legend.background_fill_color = "#111516"
p.legend.background_fill_alpha = 0.5
p.title.text_font_size = "14pt"
p.legend.title_text_color = "#B1B1B1"

background_source = ColumnDataSource(
data={
Expand Down Expand Up @@ -153,6 +153,7 @@ def get_failure_plot(knowledge_base, question_evaluation: Sequence = None):
text_align="center",
text_font_size="12pt",
text_font_style="bold",
text_color="#B1B1B1",
source=label_source,
)
p.add_layout(labels)
Expand All @@ -164,7 +165,6 @@ def get_knowledge_plot(knowledge_base):
if knowledge_base.topics is None:
raise ValueError("No topics found.")

TITLE = "Knowledge Base UMAP representation"
TOOLS = "hover,pan,wheel_zoom,box_zoom,reset,save"

topics_ids = [doc.topic_id for doc in knowledge_base._documents]
Expand Down Expand Up @@ -196,13 +196,12 @@ def get_knowledge_plot(knowledge_base):
p = figure(
tools=TOOLS,
toolbar_location="right",
title=TITLE,
x_range=x_range,
y_range=y_range,
sizing_mode="stretch_width",
)
p.toolbar.logo = "grey"
p.background_fill_color = "#efefef"
p.background_fill_color = "#14191B"
p.grid.grid_line_color = "white"

p.hover.tooltips = """
Expand All @@ -226,8 +225,12 @@ def get_knowledge_plot(knowledge_base):
legend_group="topic",
)
p.legend.location = "top_right"
p.legend.title = "Knowledge Base Topics"
p.legend.title = "Knowledge Base Tospics"
p.legend.title_text_font_style = "bold"
p.legend.background_fill_color = "#111516"
p.legend.background_fill_alpha = 0.5
p.legend.title_text_color = "#B1B1B1"

p.title.text_font_size = "14pt"

return p
36 changes: 30 additions & 6 deletions giskard/rag/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,27 +352,45 @@ def plot_correctness_by_metadata(self, metadata_name: str):
p = figure(
y_range=metadata_values,
height=350,
title=f"Correctness by {metadata_name}",
# title=f"Correctness by {metadata_name}",
toolbar_location=None,
tools="hover",
width_policy="max",
)

p.hbar(y="metadata_values", right="correctness", source=source, height=0.9, fill_color="colors")
p.hbar(y="metadata_values", right="correctness", source=source, height=0.85, fill_color="#14191B")
p.hbar(
y="metadata_values",
right="correctness",
source=source,
height=0.85,
fill_color="#78BBFA",
fill_alpha=0.7,
line_color="white",
line_width=2,
)
vline = Span(
location=overall_correctness * 100, dimension="height", line_color="red", line_width=2, line_dash="dashed"
location=overall_correctness * 100,
dimension="height",
line_color="#EA3829",
line_width=2,
line_dash="dashed",
)

p.add_layout(vline)
p.background_fill_color = "#14191B"

p.x_range.start = 0
r_line = p.line(
[0],
[0],
legend_label="Correctness on the entire Testset",
line_dash="dashed",
line_color="red",
line_color="#EA3829",
line_width=2,
)
r_line.visible = False # Set this fake line to invisible
p.legend.background_fill_color = "#111516"
p.legend.background_fill_alpha = 0.5

p.xaxis.axis_label = "Correctness (%)"
p.title.text_font_size = "14pt"
Expand Down Expand Up @@ -418,7 +436,7 @@ def plot_metrics_hist(self, metric_name: str, filter_metadata: dict = None):
bottom=0,
left=edges[:-1],
right=edges[1:],
fill_color="skyblue",
fill_color="#78bbfa",
line_color="white",
)
p.title.text_font_size = "12pt"
Expand All @@ -435,6 +453,12 @@ def _apply_theme(self, p):
doc.add_root(p)
return p

def _apply_theme(self, p):
curdoc().theme = "dark_minimal"
doc = Document(theme=curdoc().theme)
doc.add_root(p)
return p

def _get_plot_components(self, p):
script, div = components(self._apply_theme(p), theme="dark_minimal")
return {"script": script, "div": div}
Expand Down
164 changes: 77 additions & 87 deletions giskard/visualization/templates/rag_report/rag_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,112 +15,102 @@
<div id="gsk-rag" class="dark:text-white dark:bg-zinc-800 rounded border border-gray-500">
<div class="header border-b border-b-gray-500">
{# Giskard logo #}
<div>
<svg xmlns="http://www.w3.org/2000/svg" width="60" height="30" viewBox="0 0 30 15" fill="none" id="gsk-logo">
<path fill="#fff" fill-rule="evenodd"
d="M22.504 1.549a4.196 4.196 0 0 1 2.573-.887v.002a3.783 3.783 0 0 1 2.706 1.086 3.783 3.783 0 0 1 1.126 2.69 3.771 3.771 0 0 1-1.126 2.69 3.77 3.77 0 0 1-2.706 1.085l-4.794.011-2.533 3.467L8.203 15l2.881-3.335a9.829 9.829 0 0 1-4.663-1.68H3.185L0 7.163h3.934C4.263 3.165 8.187 0 12.96 0c2.24 0 4.489.696 6.175 1.909a7.423 7.423 0 0 1 1.882 1.919 4.194 4.194 0 0 1 1.487-2.28ZM7.05 3.249l3.91 3.915h1.505L7.89 2.584a7.773 7.773 0 0 0-.84.665Zm4.079-2.008 5.923 5.923h1.503l-6.086-6.087c-.45.023-.898.078-1.34.164ZM4.574 8.226h-1.77l.784.693h1.584a8.454 8.454 0 0 1-.598-.693Zm9.479 0H5.984c1.469 1.477 3.656 2.377 5.977 2.422l2.092-2.422Zm-2.458 4.472 5.492-1.902 1.878-2.569h-3.508l-3.862 4.47Zm10.361-5.552h3.265a2.714 2.714 0 0 0 1.747-4.648 2.711 2.711 0 0 0-1.888-.773 3.127 3.127 0 0 0-3.123 3.124v2.297Zm3.659-3.73a.677.677 0 1 1-.134 1.348.677.677 0 0 1 .134-1.348Z"
clip-rule="evenodd" />
</svg>
</div>
<h1>RAG Evaluation Toolkit</h1>
<svg xmlns="http://www.w3.org/2000/svg" width="60" height="30" viewBox="0 0 30 15" fill="none" id="gsk-logo">
<path fill="#fff" fill-rule="evenodd"
d="M22.504 1.549a4.196 4.196 0 0 1 2.573-.887v.002a3.783 3.783 0 0 1 2.706 1.086 3.783 3.783 0 0 1 1.126 2.69 3.771 3.771 0 0 1-1.126 2.69 3.77 3.77 0 0 1-2.706 1.085l-4.794.011-2.533 3.467L8.203 15l2.881-3.335a9.829 9.829 0 0 1-4.663-1.68H3.185L0 7.163h3.934C4.263 3.165 8.187 0 12.96 0c2.24 0 4.489.696 6.175 1.909a7.423 7.423 0 0 1 1.882 1.919 4.194 4.194 0 0 1 1.487-2.28ZM7.05 3.249l3.91 3.915h1.505L7.89 2.584a7.773 7.773 0 0 0-.84.665Zm4.079-2.008 5.923 5.923h1.503l-6.086-6.087c-.45.023-.898.078-1.34.164ZM4.574 8.226h-1.77l.784.693h1.584a8.454 8.454 0 0 1-.598-.693Zm9.479 0H5.984c1.469 1.477 3.656 2.377 5.977 2.422l2.092-2.422Zm-2.458 4.472 5.492-1.902 1.878-2.569h-3.508l-3.862 4.47Zm10.361-5.552h3.265a2.714 2.714 0 0 0 1.747-4.648 2.711 2.711 0 0 0-1.888-.773 3.127 3.127 0 0 0-3.123 3.124v2.297Zm3.659-3.73a.677.677 0 1 1-.134 1.348.677.677 0 0 1 .134-1.348Z"
clip-rule="evenodd" />
</svg>
<h1>RAG Evaluation Toolkit</h1>
</div>



<div id="gsk-overview">
{% if knowledge_script is not none %}
<div class="overview-side">
<h3 class="extended-title">Knowledge base overview</h3>
{{knowledge_script | safe}}
{{knowledge_div | safe}}
</div>
{% endif %}
<div class="overview-side" style="flex:1">
<h3 class="extended-title">Components Analysis</h3>
<table style="width:100%" id="component-table">
<div class="section-container">
<div class="components-container">
{% for component_name, component in components.items() %}
<tr title="This is a tooltip">
<td><strong>{{component_name}}</strong> </td>
<td><progress max="100" value={{component.score*100}} class="{% if component.score >= 0.7 %} progress-green {% elif component.score >= 0.3 %} progress-orange {% else %} progress-red {% endif %}">{{(component.score*100)|round(2)}}%</progress> <span class="tooltip-text" id="fade">{{component.description}}</span></td>
<td><strong>{{(component.score*100)|round(2)}}%</strong> </td>
</tr>
<div class="component-card">
<div class="component-title">{{component_name}}</div>
<div class="component-value tooltip {% if component.score >= 0.7 %} text-green {% elif component.score >= 0.3 %} text-orange {% else %} text-red {% endif %}">
{{(component.score*100)|round(2)}}%
<span class="tooltiptext" id="fade">{{component.description}}</span>
</div>
<div class="component-bar">
<progress max="100" value={{component.score*100}} class="{% if component.score >= 0.7 %} progress-green {% elif component.score >= 0.3 %} progress-orange {% else %} progress-red {% endif %}">{{(component.score*100)|round(2)}}%</progress>
</div>
</div>
{% endfor %}
</table>
<div class="callout">
<span class="callout-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<path
d="M14 9.5c0-.825.675-1.5 1.5-1.5h1c.825 0 1.5.675 1.5 1.5v1c0 .825-.675 1.5-1.5 1.5h-1c-.825 0-1.5-.675-1.5-1.5v-1zM20 24h-8v-2h2v-6h-2v-2h6v8h2z" />
<path
d="M16 0C7.163 0 0 7.163 0 16s7.163 16 16 16 16-7.163 16-16S24.837 0 16 0zm0 29C8.82 29 3 23.18 3 16S8.82 3 16 3s13 5.82 13 13-5.82 13-13 13z" />
</svg></span>

<p>Each component is scored based on the correctness of the agent on different types of questions.
Each score grades a component from 0 to 100%. <br>
<strong>Low scores help you identify weaknesses of your RAG and what
components need improvement</strong>.</p>

<div class="overall-card">
<div class="overall-title">Overall Correctness Score</div>
<div class="overall-value">90%</div>
</div>
</div>
</div>
<div id="gsk-advice">
<div id="recommendation">
<h2>⚠️ Recommendation</h2>
{{recommendation}}

<div class="section-container">
<div class="section-card">
<div class="section-title">RECOMMENDATION</div>
<span class="section-content">{{recommendation}}</span>
</div>

</div>
<div class="separator"></div>
<div id="gsk-correctness">
<h3>Correctness</h3>

<div class="flex-row">
<div style="flex:0.7">
<div class="correctness-indicator {% if correctness >= 0.7 %} green {% elif correctness >= 0.3 %} orange {% else %} red {% endif %}">Overall Correctness Score: {{(correctness*100)|round(2)}}% </div>
</div>

<div class="corr-plot">
{{topic_correctness_script | safe}}
{{topic_correctness_div | safe}}
</div>
<div class="section-container">
<div class="section-card">
<div class="section-title">CORRECTNESS BY TOPIC</div>
{{topic_correctness_script | safe}}
{{topic_correctness_div | safe}}
</div>
</div>
{% if additional_metrics %}
<div class="separator"></div>
<div id="gsk-metrics">
{# Tab links #}
<div class="tab">
<div class="tab-title">Selected metrics</div>
{% for tab, _ in metric_histograms.items() %}
<button class="tablinks{% if loop.index == 1 %} active{% endif %}"
onclick="opentab(event, '{{tab}}')">{% if tab == "Question" %} Question types {% else %} {{tab}} {%
endif %}</button>
{% endfor %}

<div class="section-container">
<div class="section-card">
<div class="section-title">KNOWLEDGE BASE OVERVIEW</div>
{{knowledge_script | safe}}
{{knowledge_div | safe}}
</div>
{# Tab content #}
{% for tab, content in metric_histograms.items() %}
<div id={{tab}} class="tabcontent"{% if loop.index == 1 %} style="display: block"{% endif %}>
{% for split_value, metrics in content.items()%}
<div class="tab-row">
{% if split_value != "Overall" %}<h4>{{split_value}}</h4>{% endif %}
<div class="hist-row">
{% for metric, hist in metrics.items() %}
<div>
{{hist.script | safe}}
{{hist.div | safe}}
</div>

<div class="section-container">
<div class="section-card">

<div class="section-title">SELECTED METRICS</div>

{% if additional_metrics %}
<div class="separator"></div>
<div id="gsk-metrics">
{# Tab links #}
<div class="tab">
<div class="tab-title">Selected metrics</div>
{% for tab, _ in metric_histograms.items() %}
<button class="tablinks{% if loop.index == 1 %} active{% endif %}"
onclick="opentab(event, '{{tab}}')">{% if tab == "Question" %} Question types {% else %} {{tab}} {%
endif %}</button>
{% endfor %}
</div>
{# Tab content #}
{% for tab, content in metric_histograms.items() %}
<div id={{tab}} class="tabcontent"{% if loop.index == 1 %} style="display: block"{% endif %}>
{% for split_value, metrics in content.items()%}
<div class="tab-row">
{% if split_value != "Overall" %}<h4>{{split_value}}</h4>{% endif %}
<div class="hist-row">
{% for metric, hist in metrics.items() %}
<div>
{{hist.script | safe}}
{{hist.div | safe}}
</div>
{% endfor %}
</div>
</div>
<div class="separator-border"></div>

{% endfor %}
</div>
{% endfor %}
</div>
<div class="separator-border"></div>

{% endfor %}
{% endif %}

</div>
{% endfor %}


</div>
{% endif %}


</div>
</div>
{% endblock %}
Expand Down
Loading
Loading