In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [2]:
df = pd.read_csv('../data/experiment_results/faiss_hyperparam/faiss_hyperparam.csv')
fig = px.line(
    df,
    x='0_nprobe',
    y='overall_recall_at_k',
    color='faiss_nlist',
    title='Hyperparameter Seach for FAISS Cluster Size',
    labels={
        '0_nprobe': 'Clusters to Search',
        'overall_recall_at_k': 'Recall@20',
        'faiss_nlist': 'Total Clusters'
    }
)

# Update layout for better readability
fig.update_layout(
    xaxis_title='Clusters to Search',
    yaxis_title='Recall@20',
    legend_title='Total Clusters',
    hovermode='x unified'
)

fig.update_layout(
    legend=dict(
        traceorder='normal',
        itemsizing='constant'
    )
)

# Convert faiss_nlist to numeric for proper sorting
df['faiss_nlist'] = pd.to_numeric(df['faiss_nlist'])
fig.data = sorted(fig.data, key=lambda x: int(x.name))

# Show the plot
fig.show()

# fig.write_image(
#     "../img/faiss_hyperparam.png",
#     scale=2,  # Increase the scale factor for higher resolution
#     width=1200,
#     height=800,
#     format='png'
# )

In [3]:
# Data preparation
data = {
    'Method': ['Text Search', 'Text + Embeddings', 'Text + Embeddings + LLM'] * 2,
    'Query Type': ['Basic Query'] * 3 + ['Natural Query'] * 3,
    'Precision@20': [73, 81, 78, 7, 70, 62],
    'Recall@20': [42, 33, 41, 7, 53, 58],
    'Search Time': [0.3, 0.6, 4.24, 0.3, 0.6, 4.24]
}

df = pd.DataFrame(data)

# Color mapping for consistency
color_map = {
    'Basic Query': '#ef4444',    # Red
    'Natural Query': '#3b82f6'   # Blue
}

# Chart 1: Precision@20
fig_precision = px.bar(
    df, 
    x='Method', 
    y='Precision@20',
    color='Query Type',
    color_discrete_map=color_map,
    title='Precision@20 Performance Comparison',
    labels={'Precision@20': 'Precision@20 (%)'},
    barmode='group'
)

# Update layout for precision chart
fig_precision.update_layout(
    font=dict(size=16),
    xaxis_title='',
    yaxis_title_font_size=18,
    title_font_size=20,
    legend=dict(
        orientation="v",
        yanchor="middle",
        y=0.5,
        xanchor="left",
        x=1.02
    ),
    margin=dict(r=150),
    height=500,
    width=800
)

fig_precision.update_xaxes(tickangle=-45)
fig_precision.show()

# Chart 2: Recall@20
fig_recall = px.bar(
    df, 
    x='Method', 
    y='Recall@20',
    color='Query Type',
    color_discrete_map=color_map,
    title='Recall@20 Performance Comparison',
    labels={'Recall@20': 'Recall@20 (%)'},
    barmode='group'
)

# Update layout for recall chart
fig_recall.update_layout(
    font=dict(size=16),
    xaxis_title='',
    yaxis_title_font_size=18,
    title_font_size=20,
    legend=dict(
        orientation="v",
        yanchor="middle",
        y=0.5,
        xanchor="left",
        x=1.02
    ),
    margin=dict(r=150),
    height=500,
    width=800
)

fig_recall.update_xaxes(tickangle=-45)
fig_recall.show()

# Chart 3: Search Time (single series)
search_time_data = {
    'Method': ['Text Search', 'Text + Embeddings', 'Text + Embeddings + LLM'],
    'Average Search Time': [0.3, 0.6, 4.24]
}

df_time = pd.DataFrame(search_time_data)

fig_time = px.bar(
    df_time, 
    x='Method', 
    y='Average Search Time',
    title='Average Search Time Comparison',
    labels={'Average Search Time': 'Time (seconds)'},
    color_discrete_sequence=['#10b981']  # Green
)

# Update layout for time chart
fig_time.update_layout(
    font=dict(size=16),
    xaxis_title='',
    yaxis_title_font_size=18,
    title_font_size=20,
    showlegend=False,
    height=500,
    width=800
)

fig_time.update_xaxes(tickangle=-45)
fig_time.show()

# Export
fig_precision.write_image("../img/precision_chart.png", width=800, height=500)
fig_recall.write_image("../img/recall_chart.png", width=800, height=500)
fig_time.write_image("../img/search_time_chart.png", width=800, height=500)