In [1]:
# Copyright (c) 2024 Braid Technologies Ltd

## Imports 

In [2]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

## Loading Data

In [3]:
# Define the directory containing the files
directory = "D:/Braid Technologies/BraidTechnologiesRepo/WorkedExamples/BoxerTest/test output"

# List all files in the directory
files = os.listdir(directory)

# Filter to get only the Excel files (.xlsx)
excel_files = [file for file in files if file.endswith('.xlsx')]

# Function to convert filename into a valid variable name
def create_variable_name(filename):
    variable_name = filename.replace('.xlsx', '').replace(' ', '_').replace('-', '_').replace('.', '_')
    return variable_name

# Read each Excel file and assign it to a dynamically named variable
for excel_file in excel_files:
    file_path = os.path.join(directory, excel_file)
    df = pd.read_excel(file_path)
    
    # Generate a valid variable name
    variable_name = create_variable_name(excel_file)
    
    # Use globals() to dynamically assign the DataFrame to the variable name
    globals()[variable_name] = df
    print(f"Loaded {excel_file} into variable: {variable_name}")
    
    # Print the columns of each dataframe for inspection
    print(f"Columns in {variable_name}:")
    print(df.columns)
    print("\n")

Loaded Test Results 2024052802_Baseline.xlsx into variable: Test_Results_2024052802_Baseline
Columns in Test_Results_2024052802_Baseline:
Index(['Column1.question', 'Column1.hit', 'Column1.summary',
       'Column1.hitRelevance', 'Column1.followUp', 'Column1.followUpOnTopic'],
      dtype='object')


Loaded Test Results Baseline - sameEmbeddingsAsV1.xlsx into variable: Test_Results_Baseline___sameEmbeddingsAsV1
Columns in Test_Results_Baseline___sameEmbeddingsAsV1:
Index(['Column1.question', 'Column1.enriched_question', 'Column1.hit',
       'Column1.summary', 'Column1.hitRelevance', 'Column1.followUp',
       'Column1.followUpOnTopic'],
      dtype='object')


Loaded test_output_v1_2024-08-28_14-25-47.xlsx into variable: test_output_v1_2024_08_28_14_25_47
Columns in test_output_v1_2024_08_28_14_25_47:
Index(['Column1.question', 'Column1.enriched_question', 'Column1.hit',
       'Column1.summary', 'Column1.hitRelevance'],
      dtype='object')


Loaded test_output_v2_businessanalyst_20

## Visualizations Set 1: Box Plots 

In [4]:
# Define thresholds as variables for flexibility
threshold_baseline_to_v4 = 0.8
threshold_v5 = 0.6

#### Box Plot: Static Persona all versions 

In [5]:
# Define thresholds as variables for flexibility
threshold_baseline_to_v4 = 0.8
threshold_v5 = 0.6

# Dataframes for Static Persona
dataframes_static = {
    "baseline": Test_Results_2024052802_Baseline,
    "v1": test_output_v1_2024_08_28_14_25_47,
    "v2": test_output_v2_nonetype_2024_09_03_12_03_59,
    "v3": test_output_v3_nonetype_2024_09,
    "v4": test_output_v4_nonetype_2024_09_28_21_30_03,
    "v5": test_output_v5_nonetype_2024_10_07_18_04_26
}

# Combine data for easier plotting
all_data_static = []
for version, df in dataframes_static.items():
    df['version'] = version  # Add a column for version
    all_data_static.append(df[['Column1.hit', 'Column1.hitRelevance', 'version']])

# Concatenate all data into a single DataFrame
combined_df_static = pd.concat(all_data_static)

# Convert hit column to binary values (1 if hit, 0 if not) based on a threshold of 0.75
combined_df_static['hit_binary'] = combined_df_static['Column1.hitRelevance'].apply(lambda x: 1 if x >= 0.75 else 0)

# Create a box plot for Static Persona including the baseline
fig_static = px.box(combined_df_static, 
                    x='version', 
                    y='Column1.hitRelevance', 
                    points='all',  # Show all data points
                    title='Hit Relevance Distribution by Version (Static Persona)',
                    labels={'Column1.hitRelevance': 'Hit Relevance Score', 'version': 'Version'},
                    color='version',  
                    color_discrete_sequence=px.colors.qualitative.Set2)  # Use distinct color palette

# Customize box plot aesthetics
fig_static.update_traces(
    boxmean=True, 
    jitter=0.2,
    pointpos=-1.5,
    marker=dict(size=6, opacity=0.8),
    width=0.4
)

# Add horizontal lines for thresholds
fig_static.add_shape(
    type="line", line_color="darkred", line_width=2, opacity=0.7, 
    x0=-0.5, x1=4.5, y0=threshold_baseline_to_v4, y1=threshold_baseline_to_v4, 
    line_dash="dash"
)
fig_static.add_shape(
    type="line", line_color="darkblue", line_width=2, opacity=0.7, 
    x0=4, x1=5.5, y0=threshold_v5, y1=threshold_v5,
    line_dash="dash"
)

# Move the threshold text annotations to the right
fig_static.add_annotation(
    x=5, y=threshold_baseline_to_v4,
    text="Threshold: 0.8", showarrow=False, xshift=50, yshift=10,
    font=dict(size=12, color="darkred")
)
fig_static.add_annotation(
    x=5, y=threshold_v5,
    text="Threshold: 0.6", showarrow=False, xshift=50, yshift=-10,
    font=dict(size=12, color="darkblue")
)

# Adjust layout for better spacing and visual clarity
fig_static.update_layout(
    xaxis_title='Version',
    yaxis_title='Hit Relevance Score',
    legend_title_text='Version',
    font=dict(size=14, family='Arial'),
    plot_bgcolor='#f7f7f7',  
    title_font=dict(size=22, family='Times New Roman', color='darkblue'),  
    width=1000,
    height=650,
    margin=dict(t=70, l=60, r=40, b=60),
    boxmode='group', 
    showlegend=True,  
    yaxis=dict(showgrid=True, gridcolor='lightgray'),  
    bargap=0.15,
    hovermode="closest"  
)

# Update hover template
fig_static.update_traces(
    hovertemplate="<b>Version</b>: %{x}<br><b>Hit Relevance</b>: %{y}<br><b>Hit Binary</b>: %{customdata}"
)

# Display the figure
fig_static.show()


#### Box Plot: Tester Persona all versions 

In [6]:
# List of dataframes and their version labels for comparison
dataframes_static = {
    "baseline": Test_Results_2024052802_Baseline,
    "v2": test_output_v2_tester_2024_09_03_12_13_59,
    "v3": test_output_v3_tester_2024_09_17_03_21_25,
    "v4": test_output_v4_tester_2024_09_29_00_32_01,
    "v5": test_output_v5_tester_2024_10_07_17_35_56
}

# Combine data for easier plotting
all_data_static = []
for version, df in dataframes_static.items():
    df['version'] = version  # Add a column for version
    all_data_static.append(df[['Column1.hit', 'Column1.hitRelevance', 'version']])

# Concatenate all data into a single DataFrame
combined_df_static = pd.concat(all_data_static)

# Convert hit column to binary values (1 if hit, 0 if not) based on a threshold of 0.75
combined_df_static['hit_binary'] = combined_df_static['Column1.hitRelevance'].apply(lambda x: 1 if x >= 0.75 else 0)

# Create a box plot for Static Questions including the baseline
fig_static = px.box(combined_df_static, 
                    x='version', 
                    y='Column1.hitRelevance', 
                    points='all',  # Show all data points
                    title='Hit Relevance Distribution by Version (Tester Persona)',
                    labels={'Column1.hitRelevance': 'Hit Relevance Score', 'version': 'Version'},
                    color='version',  
                    color_discrete_sequence=px.colors.qualitative.Set2)  # Use distinct color palette

# Customize box plot aesthetics for bigger boxes and scatter points
fig_static.update_traces(
    boxmean=True,  # Show mean in each box
    jitter=0.2,  # Reduce jitter for more space
    pointpos=-1.5,  # Adjust point position closer to boxes
    marker=dict(size=6, opacity=0.8),  # Increase marker size
    width=0.4  # Increase box width
)

# Add horizontal lines for thresholds
fig_static.add_shape(
    type="line", line_color="darkred", line_width=2, opacity=0.7, 
    x0=-0.5, x1=4, y0=threshold_baseline_to_v4, y1=threshold_baseline_to_v4, 
    line_dash="dash"
)
fig_static.add_shape(
    type="line", line_color="darkblue", line_width=2, opacity=0.7, 
    x0=3, x1=5.5, y0=threshold_v5, y1=threshold_v5,  # Line starts after the v4 column
    line_dash="dash"
)

# Move the threshold text annotations to the right
fig_static.add_annotation(
    x=5, y=threshold_baseline_to_v4,
    text="Threshold: 0.8", showarrow=False, xshift=50, yshift=10,
    font=dict(size=12, color="darkred")
)
fig_static.add_annotation(
    x=5, y=threshold_v5,
    text="Threshold: 0.6", showarrow=False, xshift=50, yshift=-10,
    font=dict(size=12, color="darkblue")
)

# Adjust layout for better spacing and visual clarity
fig_static.update_layout(
    xaxis_title='Version',
    yaxis_title='Hit Relevance Score',
    legend_title_text='Version',
    font=dict(size=14, family='Arial'),  # Different font for readability
    plot_bgcolor='#f7f7f7',  # Soft background for readability
    title_font=dict(size=22, family='Times New Roman', color='darkblue'),  # Stylish title font
    width=1000,  # Adjust width
    height=650,  # Adjust height
    margin=dict(t=70, l=60, r=40, b=60),
    boxmode='group', 
    showlegend=True,  
    yaxis=dict(showgrid=True, gridcolor='lightgray'),  # Add gridlines for clarity
    bargap=0.15,  # Adjust gap between boxes
    hovermode="closest"  # Hovering shows closest point information
)

# Update hover template for better clarity
fig_static.update_traces(
    hovertemplate="<b>Version</b>: %{x}<br><b>Hit Relevance</b>: %{y}<br><b>Hit Binary</b>: %{customdata}"
)

# Display the figure
fig_static.show()


#### Box Plot: Developer Persona all versions 

In [7]:

# Dataframes for Developer Persona 
dataframes_developer = {
    "baseline": Test_Results_2024052802_Baseline,
    "v2": test_output_v2_developer_2024_09_03_12_10_22,
    "v3": test_output_v3_developer_2024_0,
    "v4": test_output_v4_developer_2024_09_28_21_45_10,
    "v5": test_output_v5_businessanalyst_2024_10_07_17_55_42
}

# Combine data for easier plotting
all_data_developer = []
for version, df in dataframes_developer.items():
    df['version'] = version  # Add a column for version
    all_data_developer.append(df[['Column1.hit', 'Column1.hitRelevance', 'version']])

# Concatenate all data into a single DataFrame
combined_df_developer = pd.concat(all_data_developer)

# Convert hit column to binary values (1 if hit, 0 if not) based on a threshold of 0.75
combined_df_developer['hit_binary'] = combined_df_developer['Column1.hitRelevance'].apply(lambda x: 1 if x >= 0.75 else 0)

# Create a box plot for Developer Persona
fig_developer = px.box(combined_df_developer, 
                       x='version', 
                       y='Column1.hitRelevance', 
                       points='all',
                       title='Hit Relevance Distribution by Version (Developer Persona)',
                       labels={'Column1.hitRelevance': 'Hit Relevance Score', 'version': 'Version'},
                       color='version',  
                       color_discrete_sequence=px.colors.qualitative.Set2)

# Customize box plot aesthetics
fig_developer.update_traces(
    boxmean=True,
    jitter=0.2,
    pointpos=-1.5,
    marker=dict(size=6, opacity=0.8),
    width=0.4
)

# Add horizontal lines for thresholds
fig_developer.add_shape(
    type="line", line_color="darkred", line_width=2, opacity=0.7, 
    x0=-0.5, x1=4, y0=threshold_baseline_to_v4, y1=threshold_baseline_to_v4, 
    line_dash="dash"
)
fig_developer.add_shape(
    type="line", line_color="darkblue", line_width=2, opacity=0.7, 
    x0=3, x1=5.5, y0=threshold_v5, y1=threshold_v5,
    line_dash="dash"
)

# Move the threshold text annotations to the right
fig_developer.add_annotation(
    x=5, y=threshold_baseline_to_v4,
    text="Threshold: 0.8", showarrow=False, xshift=50, yshift=10,
    font=dict(size=12, color="darkred")
)
fig_developer.add_annotation(
    x=5, y=threshold_v5,
    text="Threshold: 0.6", showarrow=False, xshift=50, yshift=-10,
    font=dict(size=12, color="darkblue")
)

# Adjust layout
fig_developer.update_layout(
    xaxis_title='Version',
    yaxis_title='Hit Relevance Score',
    legend_title_text='Version',
    font=dict(size=14, family='Arial'),
    plot_bgcolor='#f7f7f7',
    title_font=dict(size=22, family='Times New Roman', color='darkblue'),
    width=1000,
    height=650,
    margin=dict(t=70, l=60, r=40, b=60),
    boxmode='group',
    showlegend=True,  
    yaxis=dict(showgrid=True, gridcolor='lightgray'),
    bargap=0.15,
    hovermode="closest"
)

# Update hover template
fig_developer.update_traces(
    hovertemplate="<b>Version</b>: %{x}<br><b>Hit Relevance</b>: %{y}<br><b>Hit Binary</b>: %{customdata}"
)

# Display the figure
fig_developer.show()


#### Box Plot: Buisness Analyst Persona all versions 

In [8]:
# Dataframes for Business Analyst Persona 
dataframes_ba = {
    "baseline": Test_Results_2024052802_Baseline,
    "v2": test_output_v2_businessanalyst_2024_09_03_12_17_34,
    "v3": test_output_v3_businessanalyst_2024_09_17_03_35_43,
    "v4": test_output_v4_businessanalyst_2024_09_29_01_42_36,
    "v5": test_output_v5_businessanalyst_2024_10_07_17_55_42
}

# Combine data for easier plotting
all_data_ba = []
for version, df in dataframes_ba.items():
    df['version'] = version  # Add a column for version
    all_data_ba.append(df[['Column1.hit', 'Column1.hitRelevance', 'version']])

# Concatenate all data into a single DataFrame
combined_df_ba = pd.concat(all_data_ba)

# Convert hit column to binary values (1 if hit, 0 if not) based on a threshold of 0.75
combined_df_ba['hit_binary'] = combined_df_ba['Column1.hitRelevance'].apply(lambda x: 1 if x >= 0.75 else 0)

# Create a box plot for Business Analyst Persona
fig_ba = px.box(combined_df_ba, 
                x='version', 
                y='Column1.hitRelevance', 
                points='all', 
                title='Hit Relevance Distribution by Version (Business Analyst Persona)',
                labels={'Column1.hitRelevance': 'Hit Relevance Score', 'version': 'Version'},
                color='version',  
                color_discrete_sequence=px.colors.qualitative.Set2)

# Customize box plot aesthetics
fig_ba.update_traces(
    boxmean=True, 
    jitter=0.2, 
    pointpos=-1.5, 
    marker=dict(size=6, opacity=0.8), 
    width=0.4
)

# Add horizontal lines for thresholds
fig_ba.add_shape(
    type="line", line_color="darkred", line_width=2, opacity=0.7, 
    x0=-0.5, x1=4, y0=threshold_baseline_to_v4, y1=threshold_baseline_to_v4, 
    line_dash="dash"
)
fig_ba.add_shape(
    type="line", line_color="darkblue", line_width=2, opacity=0.7, 
    x0=3, x1=5.5, y0=threshold_v5, y1=threshold_v5,
    line_dash="dash"
)

# Move the threshold text annotations to the right
fig_ba.add_annotation(
    x=5, y=threshold_baseline_to_v4,
    text="Threshold: 0.8", showarrow=False, xshift=50, yshift=10,
    font=dict(size=12, color="darkred")
)
fig_ba.add_annotation(
    x=5, y=threshold_v5,
    text="Threshold: 0.6", showarrow=False, xshift=50, yshift=-10,
    font=dict(size=12, color="darkblue")
)

# Adjust layout
fig_ba.update_layout(
    xaxis_title='Version',
    yaxis_title='Hit Relevance Score',
    legend_title_text='Version',
    font=dict(size=14, family='Arial'),
    plot_bgcolor='#f7f7f7',  
    title_font=dict(size=22, family='Times New Roman', color='darkblue'),  
    width=1000,
    height=650,
    margin=dict(t=70, l=60, r=40, b=60),
    boxmode='group', 
    showlegend=True,  
    yaxis=dict(showgrid=True, gridcolor='lightgray'),  
    bargap=0.15,
    hovermode="closest"
)

# Update hover template
fig_ba.update_traces(
    hovertemplate="<b>Version</b>: %{x}<br><b>Hit Relevance</b>: %{y}<br><b>Hit Binary</b>: %{customdata}"
)

# Display the figure
fig_ba.show()


## Visualizations Set 2: Grouped Bar Graphs (Gemini Evualation) 

In [10]:
dataframes_persona = {
    "static": {
        "v4": test_output_v4_nonetype_2024_09_28_21_30_03,
        "v5": test_output_v5_nonetype_2024_10_07_18_04_26
    },
    "developer": {
        "v4": test_output_v4_developer_2024_09_28_21_45_10,
        "v5": test_output_v5_developer_2024_10_07_17_25_23
    },
    "tester": {
        "v4": test_output_v4_tester_2024_09_29_00_32_01,
        "v5": test_output_v5_tester_2024_10_07_17_35_56
    },
    "business_analyst": {
        "v4": test_output_v4_businessanalyst_2024_09_29_01_42_36,
        "v5": test_output_v5_businessanalyst_2024_10_07_17_55_42
    }
}

# Define the color palette for Gemini scores (pastel colors)
likert_colors = ['#FFB3BA', '#FFDFBA', '#BAFFC9', '#BAE1FF']  # Pastel shades for 1-4

# Define score labels for the legend (with descriptions)
score_labels = {
    1: '1: Irrelevant or incoherent',
    2: '2: Partially relevant but incomplete',
    3: '3: Mostly relevant and coherent',
    4: '4: Fully relevant and coherent'
}

# Function to create a visually enhanced grouped bar chart with mean scores for each persona
def create_enhanced_gemini_chart(dataframes_persona, version):
    # Prepare the layout for the visualization
    fig = go.Figure()

    personas = ['static', 'developer', 'business analyst', 'tester']  # Removed underscores
    
    # Iterate over each persona and plot the grouped bars
    for i, persona in enumerate(personas):
        df = dataframes_persona[persona.replace(' ', '_')][version]  # Replace spaces with underscores to match the data

        # Calculate the mean score for each persona
        mean_score = df['Column1.gemini_evaluation'].mean()
        
        # Calculate the counts for each score (1 to 4)
        score_counts = df['Column1.gemini_evaluation'].value_counts().reindex([1, 2, 3, 4], fill_value=0).sort_index()

        # Add a bar for each score (1 to 4) for this persona
        for score, count in score_counts.items():
            fig.add_trace(go.Bar(
                x=[persona], y=[count], 
                name=score_labels[score],  # One legend for each score
                marker_color=likert_colors[score-1],  # Pastel color for each score
                hoverinfo='y+name',
                text=f'Score: {score}, Count: {count}',
                textposition='auto',
                offsetgroup=score,  # Grouping the bars by score
            ))
        
        # Add a text annotation for the mean score with proper spacing
        fig.add_annotation(
            x=persona,
            y=max(score_counts) + 5,  # Position the annotation above the bars
            text=f"Mean: {mean_score:.2f}",
            showarrow=False,
            font=dict(size=14, family='Arial', color='black')
        )
        
        # Add vertical dashed line separators between personas
        fig.add_shape(type="line",
                      x0=i + 0.5, x1=i + 0.5, y0=0, y1=max(score_counts) + 10,
                      line=dict(color="gray", dash="dash", width=1.5),
                      opacity=0.5)  # Translucent separator

    # Customize the layout
    fig.update_layout(
        barmode='group',  # Group the bars instead of stacking
        title={
            'text': f'Gemini Score Distribution - {version.upper()}',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': {'size': 24, 'family': 'Times New Roman', 'color': 'darkblue'}
        },
        xaxis_title="Personas",
        yaxis_title="Count of Responses",
        legend_title="Gemini Evaluation Scale",
        font=dict(size=14),
        title_font=dict(size=20),
        plot_bgcolor='white',
        height=600,
        width=1200,  # Increased width to accommodate the legend
        margin=dict(l=50, r=50, t=50, b=50)  # Adjust margins for better spacing
    )
    
    # Update legend to show only 4 entries (1 per score)
    fig.update_traces(showlegend=False)  # Hide individual legends
    for score, color in score_labels.items():
        fig.add_trace(go.Bar(
            x=[None], y=[None],
            marker_color=likert_colors[score-1],
            name=score_labels[score],
            showlegend=True
        ))

    # Show the figure
    fig.show()




In [11]:
# Create the enhanced chart for v4 (you can do the same for v5 by changing the version)
create_enhanced_gemini_chart(dataframes_persona, 'v4')

In [12]:
# Create the enhanced chart for v4 (you can do the same for v5 by changing the version)
create_enhanced_gemini_chart(dataframes_persona, 'v5')