# Importing Required Libraries and Modules

In [1]:
import time
import psutil
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import logging
import os
from pathlib import Path
from datetime import datetime
import tempfile
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.colors import HexColor
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.units import inch


# Loading and Processing PDF Documents

In [2]:

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('benchmark.log'),
        logging.StreamHandler()
    ]
)

def load_and_process_documents(docs_dir="documents"):
    """
    Load and preprocess PDF documents from the specified directory.
    """
    try:
        docs_path = Path(docs_dir)
        logging.info(f"Using documents path: {docs_path.absolute()}")
        
        if not docs_path.exists():
            raise FileNotFoundError(f"Documents directory not found: {docs_path}")
        
        # Load documents
        docs_combined = []
        for pdf_file in docs_path.glob("*.pdf"):
            try:
                loader = PyPDFLoader(str(pdf_file))
                docs_combined.extend(loader.load())
                logging.info(f"Successfully loaded: {pdf_file.name}")
            except Exception as e:
                logging.error(f"Error loading {pdf_file.name}: {str(e)}")
        
        if not docs_combined:
            raise ValueError("No documents were successfully loaded")
        
        # Split documents
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        split_docs = text_splitter.split_documents(docs_combined)
        logging.info(f"Processed {len(split_docs)} document chunks")
        
        return split_docs
        
    except Exception as e:
        logging.error(f"Error in document processing setup: {str(e)}")
        raise


# Creating a Vector Store and Benchmarking Model Performance

In [3]:

def create_vector_store(documents, embedding_model="llama2"):
    """
    Create a FAISS vector store from the processed documents.
    """
    try:
        embeddings = OllamaEmbeddings(model=embedding_model)
        vector_store = FAISS.from_documents(documents, embeddings)
        logging.info(f"Vector store created with embedding model: {embedding_model}")
        return vector_store
    except Exception as e:
        logging.error(f"Error creating vector store: {str(e)}")
        raise

def benchmark_model(model_name, db_combined, prompt):
    """
    Benchmark a model's performance across multiple test queries.
    """
    llm = OllamaLLM(model=model_name)
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever_combined = db_combined.as_retriever()
    
    # Test queries
    test_queries = [
        "give me plan my study schedule within exam date",
        "what are the key topics I need to focus on?",
        "how should I organize my study materials?",
        "what is the most effective way to prepare for the exam?",
        "how can I track my study progress?"
    ]
    
    metrics = {
        'responses': [],
        'total_time': 0,
        'total_memory': 0,
        'total_tokens': 0,
        'avg_time': 0,  # Initialize average metrics
        'avg_memory': 0,
        'avg_tokens_per_second': 0
    }
    
    process = psutil.Process(os.getpid())
    
    for query in test_queries:
        try:
            # Memory benchmarking
            memory_before = process.memory_info().rss
            
            # Response time benchmarking
            start_time = time.time()
            response = create_retrieval_chain(retriever_combined, document_chain).invoke({"input": query})
            end_time = time.time()
            
            # Extract response text
            if isinstance(response, dict):
                response_text = response.get('answer', response.get('output', str(response)))
            else:
                response_text = str(response)
                
            # Calculate metrics for this query
            query_time = end_time - start_time
            query_memory = (process.memory_info().rss - memory_before) / (1024 * 1024)  # MB
            query_tokens = len(str(response)) / query_time  # Approximate
            
            # Store detailed response data
            metrics['responses'].append({
                'query': query,
                'response': response_text,
                'time_taken': query_time,
                'memory_used': query_memory,
                'tokens_per_second': query_tokens
            })
            
            # Update totals
            metrics['total_time'] += query_time
            metrics['total_memory'] += query_memory
            metrics['total_tokens'] += query_tokens
            
            logging.info(f"Successfully benchmarked {model_name} for query: {query[:50]}...")
            
        except Exception as e:
            logging.error(f"Error benchmarking {model_name} for query '{query}': {str(e)}")
            continue
    
    # Calculate averages
    num_queries = len(metrics['responses'])
    if num_queries > 0:
        metrics['avg_time'] = metrics['total_time'] / num_queries
        metrics['avg_memory'] = metrics['total_memory'] / num_queries
        metrics['avg_tokens_per_second'] = metrics['total_tokens'] / num_queries
    else:
        # Handle the case where no queries were successful
        logging.warning(f"No successful queries for model {model_name}")
        metrics['avg_time'] = 0
        metrics['avg_memory'] = 0
        metrics['avg_tokens_per_second'] = 0
    
    return metrics


# Generating Performance Visualizations

In [4]:

def generate_visualizations(benchmark_results):
    """
    Generate performance visualization plots.
    """
    # Set style
    sns.set_style("whitegrid")
    plt.rcParams['figure.figsize'] = (12, 8)

    # Time and Memory comparison
    fig1, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
    
    # Performance plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_time'] for result in benchmark_results],
        palette="rocket",
        ax=ax1
    )
    ax1.set_title('Average Response Time Comparison', fontsize=14, pad=20)
    ax1.set_xlabel('Model', fontsize=12)
    ax1.set_ylabel('Time (seconds)', fontsize=12)
    
    # Memory usage plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_memory'] for result in benchmark_results],
        palette="magma",
        ax=ax2
    )
    ax2.set_title('Average Memory Usage Comparison', fontsize=14, pad=20)
    ax2.set_xlabel('Model', fontsize=12)
    ax2.set_ylabel('Memory (MB)', fontsize=12)
    
    plt.tight_layout()
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        performance_path = img_stream.name

    # Processing speed plot
    plt.figure(figsize=(12, 6))
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_tokens_per_second'] for result in benchmark_results],
        palette="rocket"
    )
    plt.title('Average Processing Speed (Tokens/Second)', fontsize=14, pad=20)
    plt.xlabel('Model', fontsize=12)
    plt.ylabel('Tokens per Second', fontsize=12)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        speed_path = img_stream.name

    return performance_path, speed_path


# Creating an Enhanced PDF Report

In [5]:

def create_enhanced_pdf_report(benchmark_results, performance_path, speed_path, filename="benchmark_results.pdf"):
    """
    Create a detailed PDF report of the benchmark results.
    """
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        rightMargin=72,
        leftMargin=72,
        topMargin=72,
        bottomMargin=72
    )

    styles = getSampleStyleSheet()
    
    # Define custom styles
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        spaceAfter=30,
        alignment=1,
        textColor=HexColor('#2c3e50'),
        fontName='Helvetica-Bold'
    )
    
    section_heading = ParagraphStyle(
        'SectionHeading',
        parent=styles['Heading2'],
        fontSize=18,
        spaceBefore=20,
        spaceAfter=12,
        textColor=HexColor('#34495e'),
        fontName='Helvetica-Bold',
        borderPadding=10,
        borderWidth=1,
        borderColor=HexColor('#bdc3c7'),
        borderRadius=8
    )
    
    subsection_heading = ParagraphStyle(
        'SubsectionHeading',
        parent=styles['Heading3'],
        fontSize=14,
        spaceBefore=15,
        spaceAfter=8,
        textColor=HexColor('#2980b9'),
        fontName='Helvetica-Bold'
    )
    
    body_text = ParagraphStyle(
        'BodyText',
        parent=styles['Normal'],
        fontSize=11,
        spaceBefore=6,
        spaceAfter=6,
        leading=14
    )
    
    metric_text = ParagraphStyle(
        'MetricText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=4,
        spaceAfter=4,
        leftIndent=20,
        textColor=HexColor('#444444')
    )
    
    response_text = ParagraphStyle(
        'ResponseText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=6,
        spaceAfter=6,
        leftIndent=20,
        rightIndent=20,
        leading=12,
        backColor=HexColor('#f7f9fc'),
        borderPadding=10,
        borderWidth=1,
        borderColor=HexColor('#e1e8ed'),
        borderRadius=5
    )

    content = []
    
    # Title Page
    content.append(Paragraph("LLM Model Benchmarking Report", title_style))
    content.append(Spacer(1, 0.5 * inch))
    
    # Date and Summary
    content.append(Paragraph(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", body_text))
    content.append(Spacer(1, 0.3 * inch))

    # Executive Summary
    content.append(Paragraph("Executive Summary", section_heading))
    fastest_model = min(benchmark_results, key=lambda x: x['metrics']['avg_time'])
    most_efficient_model = min(benchmark_results, key=lambda x: x['metrics']['avg_memory'])
    highest_throughput = max(benchmark_results, key=lambda x: x['metrics']['avg_tokens_per_second'])
    
    summary_items = [
        f"• Fastest Model: {fastest_model['model']} ({fastest_model['metrics']['avg_time']:.2f} seconds average)",
        f"• Most Memory-Efficient: {most_efficient_model['model']} ({most_efficient_model['metrics']['avg_memory']:.2f} MB average)",
        f"• Highest Throughput: {highest_throughput['model']} ({highest_throughput['metrics']['avg_tokens_per_second']:.2f} tokens/second)",
        f"• Total Models Tested: {len(benchmark_results)}",
        f"• Queries per Model: {len(benchmark_results[0]['metrics']['responses'])}"
    ]
    
    for item in summary_items:
        content.append(Paragraph(item, body_text))
    
    content.append(Spacer(1, 0.3 * inch))

    # Performance Visualizations
    content.append(Paragraph("Performance Analysis", section_heading))
    content.append(Image(performance_path, width=7*inch, height=7*inch))
    content.append(Spacer(1, 0.2 * inch))
    content.append(Image(speed_path, width=7*inch, height=4*inch))
    content.append(Spacer(1, 0.3 * inch))
    
    # Detailed Model Analysis
    content.append(Paragraph("Detailed Model Analysis", section_heading))
    
    for result in benchmark_results:
        # Model Header
        content.append(Paragraph(f"Model: {result['model']}", subsection_heading))
        
        # Overall Performance Metrics
        content.append(Paragraph("Average Performance Metrics:", body_text))
        metrics = [
            f"• Average Response Time: {result['metrics']['avg_time']:.2f} seconds",
            f"• Average Memory Usage: {result['metrics']['avg_memory']:.2f} MB",
            f"• Average Processing Speed: {result['metrics']['avg_tokens_per_second']:.2f} tokens/second"
        ]
        
        for metric in metrics:
            content.append(Paragraph(metric, metric_text))
        
        # Response Analysis
        content.append(Paragraph("Sample Responses:", subsection_heading))
        
        for response_data in result['metrics']['responses']:
            # Query Info
            content.append(Paragraph(f"Query: {response_data['query']}", body_text))
            
            # Response Metrics
            response_metrics = [
                f"• Response Time: {response_data['time_taken']:.2f} seconds",
                f"• Memory Used: {response_data['memory_used']:.2f} MB",
                f"• Processing Speed: {response_data['tokens_per_second']:.2f} tokens/second"
            ]
            
            for metric in response_metrics:
                content.append(Paragraph(metric, metric_text))
            
            # Format and display the response
            formatted_response = response_data['response'].replace('\n', '<br/>')
            content.append(Paragraph("Response:", body_text))
            content.append(Paragraph(formatted_response, response_text))
            content.append(Spacer(1, 0.2 * inch))
        
        content.append(Spacer(1, 0.3 * inch))

    # Build PDF
    doc.build(content)
    
    # Cleanup temporary image files
    os.remove(performance_path)
    os.remove(speed_path)


# report generation

In [6]:
def main():
    try:
        # Initialize
        print("\nStarting document processing...")
        combined_documents = load_and_process_documents()
        print("✓ Documents processed successfully")
        
        print("\nCreating vector store...")
        db_combined = create_vector_store(combined_documents)
        print("✓ Vector store created successfully")
        
        # Define the prompt template
        prompt = ChatPromptTemplate.from_template("""
        Answer the following question based only on the provided context.
        Think step by step before providing a detailed answer.
        <context>
        {context}
        </context>
        Question: {input}""")

        # Models to benchmark
        models = [
            "llama3.2", 
            "gemma2:2b", 
            "deepseek-r1:1.5b",
            "smollm2",
            "granite3.1-dense:2b",
            "qwen2:1.5b",
        ]
        
        benchmark_results = []

        # Run benchmarks
        print("\nStarting benchmarking process...")
        for model in models:
            print(f"\nBenchmarking {model}...")
            try:
                metrics = benchmark_model(model, db_combined, prompt)
                print(f"✓ {model} benchmarked successfully")
                benchmark_results.append({
                    'model': model,
                    'metrics': metrics
                })
            except Exception as e:
                print(f"✗ Error benchmarking {model}: {str(e)}")
                logging.error(f"Error benchmarking {model}: {str(e)}")

        # Generate report
        print("\nGenerating visualizations and report...")
        performance_path, speed_path = generate_visualizations(benchmark_results)
        create_enhanced_pdf_report(benchmark_results, performance_path, speed_path)
        print("\n✓ PDF report generated successfully as 'benchmark_results.pdf'")

    except Exception as e:
        print(f"\n✗ Error during benchmark process: {str(e)}")
        logging.error(f"Error during benchmark process: {str(e)}")
        raise

if __name__ == "__main__":
    main()

2025-02-03 17:17:42,280 - INFO - Using documents path: c:\Users\infog\Documents\LinkedIn\Benchmark\documents



Starting document processing...


2025-02-03 17:17:42,573 - INFO - Successfully loaded: bank details.pdf
2025-02-03 17:17:42,624 - INFO - Successfully loaded: user data.pdf
2025-02-03 17:17:42,626 - INFO - Processed 11 document chunks


✓ Documents processed successfully

Creating vector store...


2025-02-03 17:18:00,965 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:18:00,984 - INFO - Loading faiss with AVX512 support.
2025-02-03 17:18:00,984 - INFO - Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
2025-02-03 17:18:00,985 - INFO - Loading faiss with AVX2 support.
2025-02-03 17:18:01,250 - INFO - Successfully loaded faiss with AVX2 support.
2025-02-03 17:18:01,260 - INFO - Vector store created with embedding model: llama2


✓ Vector store created successfully

Starting benchmarking process...

Benchmarking llama3.2...


2025-02-03 17:18:02,538 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:18:06,375 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:18:18,379 - INFO - Successfully benchmarked llama3.2 for query: give me plan my study schedule within exam date...
2025-02-03 17:18:23,947 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:18:27,468 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:18:33,182 - INFO - Successfully benchmarked llama3.2 for query: what are the key topics I need to focus on?...
2025-02-03 17:18:37,242 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:18:40,193 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:19:03,374 - INFO - Successfully benchmarked llama3.2 for query: how should I organize my study materials?

✓ llama3.2 benchmarked successfully

Benchmarking gemma2:2b...


2025-02-03 17:20:09,168 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:20:13,616 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:20:30,630 - INFO - Successfully benchmarked gemma2:2b for query: give me plan my study schedule within exam date...
2025-02-03 17:20:34,058 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:20:36,546 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:20:41,555 - INFO - Successfully benchmarked gemma2:2b for query: what are the key topics I need to focus on?...
2025-02-03 17:20:44,783 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:20:47,243 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:20:56,007 - INFO - Successfully benchmarked gemma2:2b for query: how should I organize my study materia

✓ gemma2:2b benchmarked successfully

Benchmarking deepseek-r1:1.5b...


2025-02-03 17:21:32,249 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:21:35,066 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:21:53,162 - INFO - Successfully benchmarked deepseek-r1:1.5b for query: give me plan my study schedule within exam date...
2025-02-03 17:21:57,491 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:21:59,626 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:22:09,974 - INFO - Successfully benchmarked deepseek-r1:1.5b for query: what are the key topics I need to focus on?...
2025-02-03 17:22:15,336 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:22:18,225 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:22:28,826 - INFO - Successfully benchmarked deepseek-r1:1.5b for query: how should I orga

✓ deepseek-r1:1.5b benchmarked successfully

Benchmarking smollm2...


2025-02-03 17:23:06,756 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:23:09,940 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:23:19,118 - INFO - Successfully benchmarked smollm2 for query: give me plan my study schedule within exam date...
2025-02-03 17:23:24,236 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:23:26,839 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:23:30,594 - INFO - Successfully benchmarked smollm2 for query: what are the key topics I need to focus on?...
2025-02-03 17:23:35,638 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:23:38,280 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:23:44,535 - INFO - Successfully benchmarked smollm2 for query: how should I organize my study materials?...

✓ smollm2 benchmarked successfully

Benchmarking granite3.1-dense:2b...


2025-02-03 17:24:16,120 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:24:18,952 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:24:26,557 - INFO - Successfully benchmarked granite3.1-dense:2b for query: give me plan my study schedule within exam date...
2025-02-03 17:24:30,200 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:24:32,475 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:24:37,576 - INFO - Successfully benchmarked granite3.1-dense:2b for query: what are the key topics I need to focus on?...
2025-02-03 17:24:41,005 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:24:43,224 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:24:51,126 - INFO - Successfully benchmarked granite3.1-dense:2b for query: how shou

✓ granite3.1-dense:2b benchmarked successfully

Benchmarking qwen2:1.5b...


2025-02-03 17:25:23,668 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:25:26,230 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:25:31,902 - INFO - Successfully benchmarked qwen2:1.5b for query: give me plan my study schedule within exam date...
2025-02-03 17:25:40,196 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:25:42,303 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:25:44,744 - INFO - Successfully benchmarked qwen2:1.5b for query: what are the key topics I need to focus on?...
2025-02-03 17:25:52,940 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-02-03 17:25:55,020 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2025-02-03 17:25:58,977 - INFO - Successfully benchmarked qwen2:1.5b for query: how should I organize my study mate

✓ qwen2:1.5b benchmarked successfully

Generating visualizations and report...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(



✓ PDF report generated successfully as 'benchmark_results.pdf'
