In [2]:
import time
import psutil
import matplotlib.pyplot as plt
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.units import inch
import os
import tempfile

def load_and_process_documents():
    # Load and preprocess documents
    loader_common = PyPDFLoader("documents\\bank details.pdf")
    loader_user = PyPDFLoader("documents\\user data.pdf")

    docs_common = loader_common.load()
    docs_user = loader_user.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    documents_common = text_splitter.split_documents(docs_common)
    documents_user = text_splitter.split_documents(docs_user)
    return documents_common + documents_user

def create_vector_store(documents, embedding_model="llama2"):
    return FAISS.from_documents(documents, OllamaEmbeddings(model=embedding_model))

def benchmark_model(model_name, db_combined, prompt):
    llm = OllamaLLM(model=model_name)
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever_combined = db_combined.as_retriever()
    user_query = "give me plan my study schedule within exam date"

    # Measure initial memory
    process = psutil.Process(os.getpid())
    memory_before = process.memory_info().rss

    # Start benchmarking
    start_time = time.time()
    response = create_retrieval_chain(retriever_combined, document_chain).invoke({"input": user_query})
    end_time = time.time()
    time_taken = end_time - start_time

    # Measure final memory
    memory_after = process.memory_info().rss
    memory_used = (memory_after - memory_before) / (1024 * 1024)  # Convert to MB

    # Extract the actual response text
    if isinstance(response, dict) and 'answer' in response:
        result_text = response['answer']
    elif isinstance(response, dict) and 'output' in response:
        result_text = response['output']
    else:
        result_text = str(response)

    return result_text, time_taken, abs(memory_used)

def generate_visualizations(model_names, results, model_memory_usage):
    # Time visualization
    plt.figure(figsize=(10, 6))
    plt.bar(model_names, results, color='skyblue')
    plt.xlabel('Model')
    plt.ylabel('Time (seconds)')
    plt.title('Benchmarking Time for Different Models')
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream_time:
        plt.savefig(img_stream_time.name, format='png', bbox_inches='tight', dpi=300)
        plt.close()
        time_path = img_stream_time.name

    # Memory visualization
    plt.figure(figsize=(10, 6))
    plt.bar(model_names, model_memory_usage, color='lightcoral')
    plt.xlabel('Model')
    plt.ylabel('Memory (MB)')
    plt.title('Memory Usage for Different Models')
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream_memory:
        plt.savefig(img_stream_memory.name, format='png', bbox_inches='tight', dpi=300)
        plt.close()
        memory_path = img_stream_memory.name

    return time_path, memory_path

def create_pdf_report(model_names, results, model_memory_usage, model_responses, time_path, memory_path, filename="benchmark_results.pdf"):
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        rightMargin=72,
        leftMargin=72,
        topMargin=72,
        bottomMargin=72
    )

    # Define styles
    styles = getSampleStyleSheet()
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        spaceAfter=30,
        leading=32
    )
    
    heading_style = ParagraphStyle(
        'CustomHeading',
        parent=styles['Heading2'],
        fontSize=16,
        spaceAfter=12,
        spaceBefore=24,
        leading=20
    )
    
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['Normal'],
        fontSize=12,
        spaceAfter=12,
        leading=14
    )

    # Create content
    content = []
    content.append(Paragraph("Benchmarking Results for Models", title_style))
    content.append(Spacer(1, 0.5 * inch))

    # Add performance summary
    content.append(Paragraph("Performance Summary", heading_style))
    content.append(Spacer(1, 0.2 * inch))

    # Add visualizations
    time_img = Image(time_path, width=6*inch, height=4*inch)
    memory_img = Image(memory_path, width=6*inch, height=4*inch)
    content.append(time_img)
    content.append(Spacer(1, 0.3 * inch))
    content.append(memory_img)
    content.append(Spacer(1, 0.5 * inch))

    # Add detailed results for each model
    content.append(Paragraph("Detailed Results", heading_style))
    
    for i, model in enumerate(model_names):
        content.append(Paragraph(f"Model: {model}", heading_style))
        content.append(Paragraph(f"Time taken: {results[i]:.4f} seconds", body_style))
        content.append(Paragraph(f"Memory used: {model_memory_usage[i]:.2f} MB", body_style))
        
        if model_responses[i]:
            content.append(Paragraph("Response:", heading_style))
            # Format response text with proper line breaks and spacing
            response_text = model_responses[i].replace('\n', '<br/>')
            content.append(Paragraph(response_text, body_style))
        
        content.append(Spacer(1, 0.3 * inch))

    # Build the PDF
    doc.build(content)

    # Clean up temporary files
    try:
        os.remove(time_path)
        os.remove(memory_path)
    except Exception as e:
        print(f"Warning: Could not remove temporary files: {e}")

def main():
    # Initialize
    combined_documents = load_and_process_documents()
    db_combined = create_vector_store(combined_documents)
    
    prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context.
    Think step by step before providing a detailed answer.
    <context>
    {context}
    </context>
    Question: {input}""")

    # Models to benchmark
    models = ["llama3.2", "gemma2:2b", "deepseek-r1:1.5b", "tinyllama"]
    
    # Store results
    results = []
    model_names = []
    model_responses = []
    model_memory_usage = []

    # Run benchmarks
    for model in models:
        print(f"\nBenchmarking {model}...")
        try:
            result_text, time_taken, memory_used = benchmark_model(model, db_combined, prompt)
            print(f"{model} - Time taken: {time_taken:.4f} seconds")
            print(f"{model} - Memory used: {memory_used:.2f} MB")
            print(f"Response: {result_text[:200]}...")  # Show first 200 chars of response

            results.append(time_taken)
            model_names.append(model)
            model_responses.append(result_text)
            model_memory_usage.append(memory_used)
        except Exception as e:
            print(f"Error benchmarking {model}: {str(e)}")

    # Generate visualizations and create PDF
    time_path, memory_path = generate_visualizations(model_names, results, model_memory_usage)
    create_pdf_report(model_names, results, model_memory_usage, model_responses, time_path, memory_path)
    print("\nPDF report has been generated as 'benchmark_results.pdf'")

if __name__ == "__main__":
    main()


Benchmarking llama3.2...
llama3.2 - Time taken: 15.8747 seconds
llama3.2 - Memory used: 3.37 MB
Response: Based on the provided context, I'll help you create a study schedule to prepare for the upcoming Bank Exam.

**Completed Topics:** Static GK, History, Geography, Banking Awareness

**Incomplete Topics...

Benchmarking gemma2:2b...
gemma2:2b - Time taken: 23.0872 seconds
gemma2:2b - Memory used: 0.75 MB
Response: Here's a suggested study schedule based on the provided context and exam dates. Remember, this is just a template; you should adjust it based on your individual learning pace and strengths/weaknesses....

Benchmarking deepseek-r1:1.5b...
deepseek-r1:1.5b - Time taken: 16.8800 seconds
deepseek-r1:1.5b - Memory used: 0.24 MB
Response: <think>
Okay, so I need to figure out how to create a study schedule based on the provided context. Let's break this down step by step.

First, looking at the context about Completed and Incomplete To...

Benchmarking tinyllama...
tinyllama - T

In [11]:
import time
import psutil
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.colors import HexColor
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, ListItem, ListFlowable
from reportlab.lib.units import inch
import os
import tempfile

def load_and_process_documents():
    # Load and preprocess documents
    loader_common = PyPDFLoader("documents\\bank details.pdf")
    loader_user = PyPDFLoader("documents\\user data.pdf")

    docs_common = loader_common.load()
    docs_user = loader_user.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    documents_common = text_splitter.split_documents(docs_common)
    documents_user = text_splitter.split_documents(docs_user)
    return documents_common + documents_user

def create_vector_store(documents, embedding_model="llama2"):
    return FAISS.from_documents(documents, OllamaEmbeddings(model=embedding_model))

def benchmark_model(model_name, db_combined, prompt):
    llm = OllamaLLM(model=model_name)
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever_combined = db_combined.as_retriever()
    user_query = "give me plan my study schedule within exam date"

    metrics = {}
    
    # Memory benchmarking
    process = psutil.Process(os.getpid())
    memory_before = process.memory_info().rss
    
    # Response time benchmarking
    start_time = time.time()
    response = create_retrieval_chain(retriever_combined, document_chain).invoke({"input": user_query})
    end_time = time.time()
    
    # Calculate metrics
    metrics['time_taken'] = end_time - start_time
    metrics['memory_used'] = (process.memory_info().rss - memory_before) / (1024 * 1024)  # MB
    metrics['tokens_per_second'] = len(str(response)) / metrics['time_taken']  # Approximate

    # Extract response text
    if isinstance(response, dict):
        metrics['result_text'] = response.get('answer', response.get('output', str(response)))
    else:
        metrics['result_text'] = str(response)

    return metrics

def generate_visualizations(benchmark_results):
    # Set style
    sns.set_style("whitegrid")
    plt.rcParams['figure.figsize'] = (12, 8)

    # Time comparison
    fig1, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
    
    # Performance plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['time_taken'] for result in benchmark_results],
        palette="viridis",
        ax=ax1
    )
    ax1.set_title('Response Time Comparison', fontsize=14, pad=20)
    ax1.set_xlabel('Model', fontsize=12)
    ax1.set_ylabel('Time (seconds)', fontsize=12)
    
    # Memory usage plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['memory_used'] for result in benchmark_results],
        palette="magma",
        ax=ax2
    )
    ax2.set_title('Memory Usage Comparison', fontsize=14, pad=20)
    ax2.set_xlabel('Model', fontsize=12)
    ax2.set_ylabel('Memory (MB)', fontsize=12)
    
    plt.tight_layout()
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        performance_path = img_stream.name

    # Tokens per second comparison
    plt.figure(figsize=(12, 6))
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['tokens_per_second'] for result in benchmark_results],
        palette="rocket"
    )
    plt.title('Processing Speed (Tokens/Second)', fontsize=14, pad=20)
    plt.xlabel('Model', fontsize=12)
    plt.ylabel('Tokens per Second', fontsize=12)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        speed_path = img_stream.name

    return performance_path, speed_path

# [Previous imports remain the same until the create_pdf_report function]

def create_enhanced_pdf_report(benchmark_results, performance_path, speed_path, filename="benchmark_results.pdf"):
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        rightMargin=72,
        leftMargin=72,
        topMargin=72,
        bottomMargin=72
    )

    # Enhanced styles
    styles = getSampleStyleSheet()
    
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        spaceAfter=30,
        alignment=1,
        textColor=HexColor('#2c3e50'),
        fontName='Helvetica-Bold'
    )
    
    section_heading = ParagraphStyle(
        'SectionHeading',
        parent=styles['Heading2'],
        fontSize=18,
        spaceBefore=20,
        spaceAfter=12,
        textColor=HexColor('#34495e'),
        fontName='Helvetica-Bold',
        borderPadding=10,
        borderWidth=1,
        borderColor=HexColor('#bdc3c7'),
        borderRadius=8
    )
    
    subsection_heading = ParagraphStyle(
        'SubsectionHeading',
        parent=styles['Heading3'],
        fontSize=14,
        spaceBefore=15,
        spaceAfter=8,
        textColor=HexColor('#2980b9'),
        fontName='Helvetica-Bold'
    )
    
    body_text = ParagraphStyle(
        'BodyText',
        parent=styles['Normal'],
        fontSize=11,
        spaceBefore=6,
        spaceAfter=6,
        leading=14,
        alignment=0
    )
    
    metric_text = ParagraphStyle(
        'MetricText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=4,
        spaceAfter=4,
        leftIndent=20,
        textColor=HexColor('#444444')
    )
    
    response_text = ParagraphStyle(
        'ResponseText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=6,
        spaceAfter=6,
        leftIndent=20,
        rightIndent=20,
        leading=12,
        backColor=HexColor('#f7f9fc'),
        borderPadding=10
    )

    content = []
    
    # Title Page
    content.append(Paragraph("Model Benchmarking Report", title_style))
    content.append(Spacer(1, 0.5 * inch))
    
    # Date and Summary
    content.append(Paragraph(f"Report Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}", body_text))
    content.append(Spacer(1, 0.3 * inch))

    # Executive Summary
    content.append(Paragraph("Executive Summary", section_heading))
    fastest_model = min(benchmark_results, key=lambda x: x['metrics']['time_taken'])
    most_efficient_model = min(benchmark_results, key=lambda x: x['metrics']['memory_used'])
    highest_throughput = max(benchmark_results, key=lambda x: x['metrics']['tokens_per_second'])
    
    summary_items = [
        f"• Fastest Response: {fastest_model['model']} ({fastest_model['metrics']['time_taken']:.2f} seconds)",
        f"• Most Memory-Efficient: {most_efficient_model['model']} ({most_efficient_model['metrics']['memory_used']:.2f} MB)",
        f"• Highest Throughput: {highest_throughput['model']} ({highest_throughput['metrics']['tokens_per_second']:.2f} tokens/second)",
        f"• Total Models Tested: {len(benchmark_results)}",
    ]
    
    for item in summary_items:
        content.append(Paragraph(item, body_text))
    
    content.append(Spacer(1, 0.3 * inch))

    # Performance Visualizations
    content.append(Paragraph("Performance Analysis", section_heading))
    content.append(Paragraph("Response Time and Memory Usage", subsection_heading))
    content.append(Image(performance_path, width=7*inch, height=7*inch))
    content.append(Spacer(1, 0.2 * inch))
    
    content.append(Paragraph("Processing Speed Analysis", subsection_heading))
    content.append(Image(speed_path, width=7*inch, height=4*inch))
    content.append(Spacer(1, 0.3 * inch))
    
    # Detailed Model Analysis
    content.append(Paragraph("Detailed Model Analysis", section_heading))
    
    for result in benchmark_results:
        # Model Header
        content.append(Paragraph(f"Model: {result['model']}", subsection_heading))
        
        # Performance Metrics
        metrics = [
            f"• Response Time: {result['metrics']['time_taken']:.2f} seconds",
            f"• Memory Usage: {result['metrics']['memory_used']:.2f} MB",
            f"• Processing Speed: {result['metrics']['tokens_per_second']:.2f} tokens/second"
        ]
        
        content.append(Paragraph("Performance Metrics:", body_text))
        for metric in metrics:
            content.append(Paragraph(metric, metric_text))
            
        # Model Response
        content.append(Paragraph("Sample Response:", body_text))
        response_text_formatted = result['metrics']['result_text'].replace('\n', '<br/>')
        content.append(Paragraph(response_text_formatted, response_text))
        
        # Add comparison to average
        avg_time = sum(r['metrics']['time_taken'] for r in benchmark_results) / len(benchmark_results)
        avg_memory = sum(r['metrics']['memory_used'] for r in benchmark_results) / len(benchmark_results)
        
        performance_comparison = [
            f"• Time vs Average: {((result['metrics']['time_taken'] - avg_time) / avg_time * 100):.1f}% " +
            ("faster" if result['metrics']['time_taken'] < avg_time else "slower"),
            f"• Memory vs Average: {((result['metrics']['memory_used'] - avg_memory) / avg_memory * 100):.1f}% " +
            ("less" if result['metrics']['memory_used'] < avg_memory else "more")
        ]
        
        content.append(Paragraph("Comparison to Average:", body_text))
        for comp in performance_comparison:
            content.append(Paragraph(comp, metric_text))
            
        content.append(Spacer(1, 0.3 * inch))

    # Build PDF
    doc.build(content)

    # Cleanup
    os.remove(performance_path)
    os.remove(speed_path)

# [Rest of the code remains the same]

def main():
    # Initialize
    combined_documents = load_and_process_documents()
    db_combined = create_vector_store(combined_documents)
    
    prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context.
    Think step by step before providing a detailed answer.
    <context>
    {context}
    </context>
    Question: {input}""")

    # Extended model list
    models = [
        "llama3.2",
        "gemma2:2b",
        "deepseek-r1:1.5b",
        "tinyllama"
    ]
    
    benchmark_results = []

    # Run benchmarks
    print("\nStarting benchmarking process...")
    for model in models:
        print(f"\nBenchmarking {model}...")
        try:
            metrics = benchmark_model(model, db_combined, prompt)
            print(f"✓ {model} benchmarked successfully")
            benchmark_results.append({
                'model': model,
                'metrics': metrics
            })
        except Exception as e:
            print(f"✗ Error benchmarking {model}: {str(e)}")

    # Generate report
    performance_path, speed_path = generate_visualizations(benchmark_results)
    create_pdf_report(benchmark_results, performance_path, speed_path)
    print("\n✓ PDF report generated successfully as 'benchmark_results.pdf'")

if __name__ == "__main__":
    main()

2025-01-31 17:45:56,857 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"



Starting benchmarking process...

Benchmarking llama3.2...


2025-01-31 17:45:57,935 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 17:46:01,568 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


✓ llama3.2 benchmarked successfully

Benchmarking gemma2:2b...


2025-01-31 17:46:17,621 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 17:46:20,721 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


✓ gemma2:2b benchmarked successfully

Benchmarking deepseek-r1:1.5b...


2025-01-31 17:46:35,574 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 17:46:38,130 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


✓ deepseek-r1:1.5b benchmarked successfully

Benchmarking tinyllama...


2025-01-31 17:46:54,841 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 17:46:56,527 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


✓ tinyllama benchmarked successfully



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(



✓ PDF report generated successfully as 'benchmark_results.pdf'


In [None]:
import time
import psutil
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import logging
import os
from pathlib import Path
from datetime import datetime
import tempfile
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.colors import HexColor
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.units import inch

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('benchmark.log'),
        logging.StreamHandler()
    ]
)

def load_and_process_documents(docs_dir="documents"):
    """
    Load and preprocess PDF documents from the specified directory.
    """
    try:
        docs_path = Path(docs_dir)
        logging.info(f"Using documents path: {docs_path.absolute()}")
        
        if not docs_path.exists():
            raise FileNotFoundError(f"Documents directory not found: {docs_path}")
        
        # Load documents
        docs_combined = []
        for pdf_file in docs_path.glob("*.pdf"):
            try:
                loader = PyPDFLoader(str(pdf_file))
                docs_combined.extend(loader.load())
                logging.info(f"Successfully loaded: {pdf_file.name}")
            except Exception as e:
                logging.error(f"Error loading {pdf_file.name}: {str(e)}")
        
        if not docs_combined:
            raise ValueError("No documents were successfully loaded")
        
        # Split documents
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        split_docs = text_splitter.split_documents(docs_combined)
        logging.info(f"Processed {len(split_docs)} document chunks")
        
        return split_docs
        
    except Exception as e:
        logging.error(f"Error in document processing setup: {str(e)}")
        raise

def create_vector_store(documents, embedding_model="llama2"):
    """
    Create a FAISS vector store from the processed documents.
    """
    try:
        embeddings = OllamaEmbeddings(model=embedding_model)
        vector_store = FAISS.from_documents(documents, embeddings)
        logging.info(f"Vector store created with embedding model: {embedding_model}")
        return vector_store
    except Exception as e:
        logging.error(f"Error creating vector store: {str(e)}")
        raise

def benchmark_model(model_name, db_combined, prompt):
    """
    Benchmark a model's performance across multiple test queries.
    """
    llm = OllamaLLM(model=model_name)
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever_combined = db_combined.as_retriever()
    
    # Test queries
    test_queries = [
        "give me plan my study schedule within exam date",
        "what are the key topics I need to focus on?",
        "how should I organize my study materials?",
        "what is the most effective way to prepare for the exam?",
        "how can I track my study progress?"
    ]
    
    metrics = {
        'responses': [],
        'total_time': 0,
        'total_memory': 0,
        'total_tokens': 0
    }
    
    process = psutil.Process(os.getpid())
    
    for query in test_queries:
        try:
            # Memory benchmarking
            memory_before = process.memory_info().rss
            
            # Response time benchmarking
            start_time = time.time()
            response = create_retrieval_chain(retriever_combined, document_chain).invoke({"input": query})
            end_time = time.time()
            
            # Extract response text
            if isinstance(response, dict):
                response_text = response.get('answer', response.get('output', str(response)))
            else:
                response_text = str(response)
                
            # Calculate metrics for this query
            query_time = end_time - start_time
            query_memory = (process.memory_info().rss - memory_before) / (1024 * 1024)  # MB
            query_tokens = len(str(response)) / query_time  # Approximate
            
            # Store detailed response data
            metrics['responses'].append({
                'query': query,
                'response': response_text,
                'time_taken': query_time,
                'memory_used': query_memory,
                'tokens_per_second': query_tokens
            })
            
            # Update totals
            metrics['total_time'] += query_time
            metrics['total_memory'] += query_memory
            metrics['total_tokens'] += query_tokens
            
            logging.info(f"Successfully benchmarked {model_name} for query: {query[:50]}...")
            
        except Exception as e:
            logging.error(f"Error benchmarking {model_name} for query '{query}': {str(e)}")
            continue
    
    # Calculate averages
    num_queries = len(metrics['responses'])
    if num_queries > 0:
        metrics['avg_time'] = metrics['total_time'] / num_queries
        metrics['avg_memory'] = metrics['total_memory'] / num_queries
        metrics['avg_tokens_per_second'] = metrics['total_tokens'] / num_queries
    
    return metrics

def generate_visualizations(benchmark_results):
    """
    Generate performance visualization plots.
    """
    # Set style
    sns.set_style("whitegrid")
    plt.rcParams['figure.figsize'] = (12, 8)

    # Time and Memory comparison
    fig1, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
    
    # Performance plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_time'] for result in benchmark_results],
        palette="viridis",
        ax=ax1
    )
    ax1.set_title('Average Response Time Comparison', fontsize=14, pad=20)
    ax1.set_xlabel('Model', fontsize=12)
    ax1.set_ylabel('Time (seconds)', fontsize=12)
    
    # Memory usage plot
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_memory'] for result in benchmark_results],
        palette="magma",
        ax=ax2
    )
    ax2.set_title('Average Memory Usage Comparison', fontsize=14, pad=20)
    ax2.set_xlabel('Model', fontsize=12)
    ax2.set_ylabel('Memory (MB)', fontsize=12)
    
    plt.tight_layout()
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        performance_path = img_stream.name

    # Processing speed plot
    plt.figure(figsize=(12, 6))
    sns.barplot(
        x=[result['model'] for result in benchmark_results],
        y=[result['metrics']['avg_tokens_per_second'] for result in benchmark_results],
        palette="rocket"
    )
    plt.title('Average Processing Speed (Tokens/Second)', fontsize=14, pad=20)
    plt.xlabel('Model', fontsize=12)
    plt.ylabel('Tokens per Second', fontsize=12)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_stream:
        plt.savefig(img_stream.name, format='png', dpi=300, bbox_inches='tight')
        plt.close()
        speed_path = img_stream.name

    return performance_path, speed_path

def create_enhanced_pdf_report(benchmark_results, performance_path, speed_path, filename="benchmark_results.pdf"):
    """
    Create a detailed PDF report of the benchmark results.
    """
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        rightMargin=72,
        leftMargin=72,
        topMargin=72,
        bottomMargin=72
    )

    styles = getSampleStyleSheet()
    
    # Define custom styles
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        spaceAfter=30,
        alignment=1,
        textColor=HexColor('#2c3e50'),
        fontName='Helvetica-Bold'
    )
    
    section_heading = ParagraphStyle(
        'SectionHeading',
        parent=styles['Heading2'],
        fontSize=18,
        spaceBefore=20,
        spaceAfter=12,
        textColor=HexColor('#34495e'),
        fontName='Helvetica-Bold',
        borderPadding=10,
        borderWidth=1,
        borderColor=HexColor('#bdc3c7'),
        borderRadius=8
    )
    
    subsection_heading = ParagraphStyle(
        'SubsectionHeading',
        parent=styles['Heading3'],
        fontSize=14,
        spaceBefore=15,
        spaceAfter=8,
        textColor=HexColor('#2980b9'),
        fontName='Helvetica-Bold'
    )
    
    body_text = ParagraphStyle(
        'BodyText',
        parent=styles['Normal'],
        fontSize=11,
        spaceBefore=6,
        spaceAfter=6,
        leading=14
    )
    
    metric_text = ParagraphStyle(
        'MetricText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=4,
        spaceAfter=4,
        leftIndent=20,
        textColor=HexColor('#444444')
    )
    
    response_text = ParagraphStyle(
        'ResponseText',
        parent=styles['Normal'],
        fontSize=10,
        spaceBefore=6,
        spaceAfter=6,
        leftIndent=20,
        rightIndent=20,
        leading=12,
        backColor=HexColor('#f7f9fc'),
        borderPadding=10,
        borderWidth=1,
        borderColor=HexColor('#e1e8ed'),
        borderRadius=5
    )

    content = []
    
    # Title Page
    content.append(Paragraph("LLM Model Benchmarking Report", title_style))
    content.append(Spacer(1, 0.5 * inch))
    
    # Date and Summary
    content.append(Paragraph(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", body_text))
    content.append(Spacer(1, 0.3 * inch))

    # Executive Summary
    content.append(Paragraph("Executive Summary", section_heading))
    fastest_model = min(benchmark_results, key=lambda x: x['metrics']['avg_time'])
    most_efficient_model = min(benchmark_results, key=lambda x: x['metrics']['avg_memory'])
    highest_throughput = max(benchmark_results, key=lambda x: x['metrics']['avg_tokens_per_second'])
    
    summary_items = [
        f"• Fastest Model: {fastest_model['model']} ({fastest_model['metrics']['avg_time']:.2f} seconds average)",
        f"• Most Memory-Efficient: {most_efficient_model['model']} ({most_efficient_model['metrics']['avg_memory']:.2f} MB average)",
        f"• Highest Throughput: {highest_throughput['model']} ({highest_throughput['metrics']['avg_tokens_per_second']:.2f} tokens/second)",
        f"• Total Models Tested: {len(benchmark_results)}",
        f"• Queries per Model: {len(benchmark_results[0]['metrics']['responses'])}"
    ]
    
    for item in summary_items:
        content.append(Paragraph(item, body_text))
    
    content.append(Spacer(1, 0.3 * inch))

    # Performance Visualizations
    content.append(Paragraph("Performance Analysis", section_heading))
    content.append(Image(performance_path, width=7*inch, height=7*inch))
    content.append(Spacer(1, 0.2 * inch))
    content.append(Image(speed_path, width=7*inch, height=4*inch))
    content.append(Spacer(1, 0.3 * inch))
    
    # Detailed Model Analysis
    content.append(Paragraph("Detailed Model Analysis", section_heading))
    
    for result in benchmark_results:
        # Model Header
        content.append(Paragraph(f"Model: {result['model']}", subsection_heading))
        
        # Overall Performance Metrics
        content.append(Paragraph("Average Performance Metrics:", body_text))
        metrics = [
            f"• Average Response Time: {result['metrics']['avg_time']:.2f} seconds",
            f"• Average Memory Usage: {result['metrics']['avg_memory']:.2f} MB",
            f"• Average Processing Speed: {result['metrics']['avg_tokens_per_second']:.2f} tokens/second"
        ]
        
        for metric in metrics:
            content.append(Paragraph(metric, metric_text))
        
        # Response Analysis
        content.append(Paragraph("Sample Responses:", subsection_heading))
        
        for response_data in result['metrics']['responses']:
            # Query Info
            content.append(Paragraph(f"Query: {response_data['query']}", body_text))
            
            # Response Metrics
            response_metrics = [
                f"• Response Time: {response_data['time_taken']:.2f} seconds",
                f"• Memory Used: {response_data['memory_used']:.2f} MB",
                f"• Processing Speed: {response_data['tokens_per_second']:.2f} tokens/second"
            ]
            
            for metric in response_metrics:
                content.append(Paragraph(metric, metric_text))
            
            # Format and display the response
            formatted_response = response_data['response'].replace('\n', '<br/>')
            content.append(Paragraph("Response:", body_text))
            content.append(Paragraph(formatted_response, response_text))
            content.append(Spacer(1, 0.2 * inch))
        
        content.append(Spacer(1, 0.3 * inch))

    # Build PDF
    doc.build(content)
    
    # Cleanup temporary image files
    os.remove(performance_path)
    os.remove(speed_path)

def main():
    try:
        # Initialize
        print("\nStarting document processing...")
        combined_documents = load_and_process_documents()
        print("✓ Documents processed successfully")
        
        print("\nCreating vector store...")
        db_combined = create_vector_store(combined_documents)
        print("✓ Vector store created successfully")
        
        # Define the prompt template
        prompt = ChatPromptTemplate.from_template("""
        Answer the following question based only on the provided context.
        Think step by step before providing a detailed answer.
        <context>
        {context}
        </context>
        Question: {input}""")

        # Models to benchmark
        models = [
            "llama2", 
            "mistral", 
            "neural-chat",
            "openhermes"
        ]
        
        benchmark_results = []

        # Run benchmarks
        print("\nStarting benchmarking process...")
        for model in models:
            print(f"\nBenchmarking {model}...")
            try:
                metrics = benchmark_model(model, db_combined, prompt)
                print(f"✓ {model} benchmarked successfully")
                benchmark_results.append({
                    'model': model,
                    'metrics': metrics
                })
            except Exception as e:
                print(f"✗ Error benchmarking {model}: {str(e)}")
                logging.error(f"Error benchmarking {model}: {str(e)}")

        # Generate report
        print("\nGenerating visualizations and report...")
        performance_path, speed_path = generate_visualizations(benchmark_results)
        create_enhanced_pdf_report(benchmark_results, performance_path, speed_path)
        print("\n✓ PDF report generated successfully as 'benchmark_results.pdf'")

    except Exception as e:
        print(f"\n✗ Error during benchmark process: {str(e)}")
        logging.error(f"Error during benchmark process: {str(e)}")
        raise

if __name__ == "__main__":
    main()

2025-01-31 18:03:59,690 - INFO - Using documents path: c:\Users\infog\Documents\LinkedIn\Benchmark\documents



Starting document processing...


2025-01-31 18:03:59,923 - INFO - Successfully loaded: bank details.pdf
2025-01-31 18:03:59,973 - INFO - Successfully loaded: user data.pdf
2025-01-31 18:03:59,974 - INFO - Processed 11 document chunks


✓ Documents processed successfully

Creating vector store...


2025-01-31 18:04:18,561 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 18:04:18,574 - INFO - Vector store created with embedding model: llama2


✓ Vector store created successfully

Starting benchmarking process...

Benchmarking llama2...


2025-01-31 18:04:19,964 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-01-31 18:04:22,473 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
