# Comprehensive Performance Analysis: LRU vs FIFO vs No-Caching

This notebook provides detailed performance comparison visualizations for the Row Match Recognize system caching strategies.

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import warnings
warnings.filterwarnings('ignore')

# Set style for professional plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("📊 Libraries loaded successfully!")

📊 Libraries loaded successfully!


In [3]:
# Load and process benchmark data
def load_benchmark_data():
    """Load and process benchmark results"""
    try:
        df = pd.read_csv('enhanced_benchmark_results.csv')
        print(f"✅ Loaded {len(df)} benchmark records")
        
        # Parse execution_times from string to list
        def safe_parse_list(x):
            try:
                if isinstance(x, str):
                    return ast.literal_eval(x)
                return x
            except:
                return []
        
        df['execution_times_parsed'] = df['execution_times'].apply(safe_parse_list)
        df['memory_usages_parsed'] = df['memory_usages'].apply(safe_parse_list)
        
        return df
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return None

# Load the data
df = load_benchmark_data()
if df is not None:
    print(f"\n📋 Data Summary:")
    print(f"   • Total records: {len(df)}")
    print(f"   • Cache modes: {', '.join(df['cache_mode'].unique())}")
    print(f"   • Scenarios: {len(df['scenario_description'].unique())}")
    print(f"\n🔍 Sample data:")
    display(df.head(3))

✅ Loaded 9 benchmark records

📋 Data Summary:
   • Total records: 9
   • Cache modes: none, fifo, lru
   • Scenarios: 3

🔍 Sample data:


Unnamed: 0,cache_mode,avg_execution_time,first_run_time,subsequent_avg_time,execution_times,initial_memory,max_memory,memory_increase,memory_usages,cache_hits,cache_misses,cache_hit_rate,result_size,scenario_id,scenario_description,complexity,pattern_type,data_size,execution_times_parsed,memory_usages_parsed
0,none,1.416025,1.392092,1.422009,"[1.392092227935791, 1.4315712451934814, 1.4348...",229.402344,232.605469,3.203125,"[229.40234375, 232.60546875, 232.60546875, 232...",0,5,0.0,0,1,"Basic patterns, small dataset",simple,basic,1000,"[1.392092227935791, 1.4315712451934814, 1.4348...","[229.40234375, 232.60546875, 232.60546875, 232..."
1,fifo,1.468439,1.617169,1.431256,"[1.6171691417694092, 1.3491580486297607, 1.402...",232.605469,232.605469,0.0,"[232.60546875, 232.60546875, 232.60546875, 232...",10,1,90.909091,0,1,"Basic patterns, small dataset",simple,basic,1000,"[1.6171691417694092, 1.3491580486297607, 1.402...","[232.60546875, 232.60546875, 232.60546875, 232..."
2,lru,1.444003,1.517629,1.425597,"[1.5176293849945068, 1.379716396331787, 1.5605...",232.605469,232.605469,0.0,"[232.60546875, 232.60546875, 232.60546875, 232...",10,1,90.909091,0,1,"Basic patterns, small dataset",simple,basic,1000,"[1.5176293849945068, 1.379716396331787, 1.5605...","[232.60546875, 232.60546875, 232.60546875, 232..."


In [4]:
# Performance Overview Dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('🚀 Performance Comparison Dashboard: LRU vs FIFO vs No-Caching', 
             fontsize=16, fontweight='bold', y=0.98)

# 1. Average Execution Time by Cache Mode
ax1 = axes[0, 0]
execution_comparison = df.groupby('cache_mode')['avg_execution_time'].mean().reset_index()
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']
bars = ax1.bar(execution_comparison['cache_mode'], execution_comparison['avg_execution_time'], 
               color=colors, alpha=0.8, edgecolor='white', linewidth=2)
ax1.set_title('⏱️ Average Execution Time by Cache Mode', fontweight='bold', pad=15)
ax1.set_ylabel('Execution Time (seconds)')
ax1.set_xlabel('Cache Mode')
ax1.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + 0.05,
            f'{height:.3f}s', ha='center', va='bottom', fontweight='bold', fontsize=11)

# 2. Cache Hit Rate Comparison
ax2 = axes[0, 1]
cache_data = df[df['cache_mode'] != 'none']
if len(cache_data) > 0:
    hit_rate_comparison = cache_data.groupby('cache_mode')['cache_hit_rate'].mean()
    bars2 = ax2.bar(hit_rate_comparison.index, hit_rate_comparison.values, 
                   color=['#4ECDC4', '#45B7D1'], alpha=0.8, edgecolor='white', linewidth=2)
    ax2.set_title('🎯 Cache Hit Rate Comparison', fontweight='bold', pad=15)
    ax2.set_ylabel('Hit Rate (%)')
    ax2.set_xlabel('Cache Mode')
    ax2.grid(True, alpha=0.3, axis='y')
    
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + 1,
                f'{height:.1f}%', ha='center', va='bottom', fontweight='bold', fontsize=11)

# 3. Memory Usage Comparison
ax3 = axes[1, 0]
memory_comparison = df.groupby('cache_mode')['memory_increase'].mean().reset_index()
bars3 = ax3.bar(memory_comparison['cache_mode'], memory_comparison['memory_increase'], 
               color=colors, alpha=0.8, edgecolor='white', linewidth=2)
ax3.set_title('💾 Average Memory Increase by Cache Mode', fontweight='bold', pad=15)
ax3.set_ylabel('Memory Increase (MB)')
ax3.set_xlabel('Cache Mode')
ax3.grid(True, alpha=0.3, axis='y')

for bar in bars3:
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height + 0.02,
            f'{height:.2f}MB', ha='center', va='bottom', fontweight='bold', fontsize=11)

# 4. Performance by Scenario Complexity
ax4 = axes[1, 1]
scenario_perf = df.groupby(['complexity', 'cache_mode'])['avg_execution_time'].mean().unstack()
scenario_perf.plot(kind='bar', ax=ax4, color=colors, alpha=0.8, width=0.7)
ax4.set_title('📊 Performance by Scenario Complexity', fontweight='bold', pad=15)
ax4.set_ylabel('Execution Time (seconds)')
ax4.set_xlabel('Complexity Level')
ax4.legend(title='Cache Mode', framealpha=0.9)
ax4.tick_params(axis='x', rotation=45)
ax4.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('performance_comparison_dashboard.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

print("✅ Performance dashboard created successfully!")

✅ Performance dashboard created successfully!


In [5]:
# Detailed Performance Heatmap
plt.figure(figsize=(12, 8))

# Prepare data for heatmap
heatmap_data = df.pivot_table(
    values='avg_execution_time', 
    index='scenario_description', 
    columns='cache_mode', 
    aggfunc='mean'
)

# Create heatmap with custom styling
mask = heatmap_data.isnull()
sns.heatmap(heatmap_data, annot=True, fmt='.3f', cmap='RdYlBu_r', 
            cbar_kws={'label': 'Execution Time (seconds)', 'shrink': 0.8},
            linewidths=1, linecolor='white', square=False, mask=mask,
            annot_kws={'fontsize': 12, 'fontweight': 'bold'})

plt.title('🔥 Performance Heatmap: Execution Time by Scenario and Cache Mode', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Cache Mode', fontweight='bold', fontsize=12)
plt.ylabel('Test Scenario', fontweight='bold', fontsize=12)
plt.xticks(rotation=0, fontsize=11)
plt.yticks(rotation=0, fontsize=10)

plt.tight_layout()
plt.savefig('performance_heatmap.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

print("✅ Performance heatmap created successfully!")

✅ Performance heatmap created successfully!


In [6]:
# Scalability Analysis
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
fig.suptitle('📏 Scalability Analysis: Performance vs Data Size', 
             fontsize=16, fontweight='bold', y=1.02)

colors_dict = {'none': '#FF6B6B', 'fifo': '#4ECDC4', 'lru': '#45B7D1'}
markers = {'none': 'o', 'fifo': 's', 'lru': '^'}

# 1. Execution Time vs Data Size
ax1 = axes[0]
for cache_mode in df['cache_mode'].unique():
    cache_data = df[df['cache_mode'] == cache_mode].sort_values('data_size')
    ax1.plot(cache_data['data_size'], cache_data['avg_execution_time'], 
            marker=markers[cache_mode], linewidth=3, markersize=10, 
            label=cache_mode.upper(), color=colors_dict[cache_mode], alpha=0.8)

ax1.set_title('⏱️ Execution Time vs Data Size', fontweight='bold', pad=15)
ax1.set_xlabel('Data Size (records)', fontweight='bold')
ax1.set_ylabel('Execution Time (seconds)', fontweight='bold')
ax1.legend(framealpha=0.9, fontsize=11)
ax1.grid(True, alpha=0.3)
ax1.set_facecolor('#FAFAFA')

# 2. Memory Usage vs Data Size
ax2 = axes[1]
for cache_mode in df['cache_mode'].unique():
    cache_data = df[df['cache_mode'] == cache_mode].sort_values('data_size')
    ax2.plot(cache_data['data_size'], cache_data['max_memory'], 
            marker=markers[cache_mode], linewidth=3, markersize=10, 
            label=cache_mode.upper(), color=colors_dict[cache_mode], alpha=0.8)

ax2.set_title('💾 Memory Usage vs Data Size', fontweight='bold', pad=15)
ax2.set_xlabel('Data Size (records)', fontweight='bold')
ax2.set_ylabel('Max Memory Usage (MB)', fontweight='bold')
ax2.legend(framealpha=0.9, fontsize=11)
ax2.grid(True, alpha=0.3)
ax2.set_facecolor('#FAFAFA')

plt.tight_layout()
plt.savefig('scalability_analysis.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

print("✅ Scalability analysis created successfully!")

✅ Scalability analysis created successfully!


In [7]:
# Performance Improvement Analysis
print("\n" + "="*80)
print("🚀 PERFORMANCE IMPROVEMENT ANALYSIS")
print("="*80)

# Get average performance by cache mode
perf_by_mode = df.groupby('cache_mode')['avg_execution_time'].mean()
memory_by_mode = df.groupby('cache_mode')['memory_increase'].mean()
cache_efficiency = df[df['cache_mode'] != 'none'].groupby('cache_mode')['cache_hit_rate'].mean()

print(f"\n📊 EXECUTION TIME COMPARISON:")
print("-" * 40)

if 'none' in perf_by_mode.index:
    baseline = perf_by_mode['none']
    print(f"🔴 Baseline (No Caching): {baseline:.3f}s")
    
    if 'fifo' in perf_by_mode.index:
        fifo_improvement = ((baseline - perf_by_mode['fifo']) / baseline) * 100
        icon = "🟢" if fifo_improvement > 0 else "🔴"
        print(f"{icon} FIFO Caching: {perf_by_mode['fifo']:.3f}s ({fifo_improvement:+.1f}%)")
    
    if 'lru' in perf_by_mode.index:
        lru_improvement = ((baseline - perf_by_mode['lru']) / baseline) * 100
        icon = "🟢" if lru_improvement > 0 else "🔴"
        print(f"{icon} LRU Caching: {perf_by_mode['lru']:.3f}s ({lru_improvement:+.1f}%)")
        
    # Compare LRU vs FIFO
    if 'fifo' in perf_by_mode.index and 'lru' in perf_by_mode.index:
        lru_vs_fifo = ((perf_by_mode['fifo'] - perf_by_mode['lru']) / perf_by_mode['fifo']) * 100
        icon = "🏆" if lru_vs_fifo > 0 else "⚠️"
        print(f"\n{icon} LRU vs FIFO Improvement: {lru_vs_fifo:+.1f}%")

print(f"\n💾 MEMORY USAGE ANALYSIS:")
print("-" * 40)
for mode, memory in memory_by_mode.items():
    emoji = "🔴" if mode == 'none' else "🟡" if mode == 'fifo' else "🟢"
    print(f"{emoji} {mode.upper():<12}: {memory:.2f} MB average increase")

if len(cache_efficiency) > 0:
    print(f"\n🎯 CACHE HIT RATES:")
    print("-" * 40)
    for mode, hit_rate in cache_efficiency.items():
        emoji = "🟢" if hit_rate > 85 else "🟡" if hit_rate > 70 else "🔴"
        print(f"{emoji} {mode.upper():<12}: {hit_rate:.1f}%")

print("\n" + "="*80)


🚀 PERFORMANCE IMPROVEMENT ANALYSIS

📊 EXECUTION TIME COMPARISON:
----------------------------------------
🔴 Baseline (No Caching): 3.778s
🔴 FIFO Caching: 4.009s (-6.1%)
🟢 LRU Caching: 3.433s (+9.1%)

🏆 LRU vs FIFO Improvement: +14.4%

💾 MEMORY USAGE ANALYSIS:
----------------------------------------
🟡 FIFO        : 0.00 MB average increase
🟢 LRU         : 0.21 MB average increase
🔴 NONE        : 1.90 MB average increase

🎯 CACHE HIT RATES:
----------------------------------------
🟢 FIFO        : 90.9%
🟢 LRU         : 90.9%



In [8]:
# Performance Summary Table
summary_stats = []

for cache_mode in df['cache_mode'].unique():
    cache_data = df[df['cache_mode'] == cache_mode]
    
    stats = {
        'Cache Mode': cache_mode.upper(),
        'Avg Execution Time (s)': f"{cache_data['avg_execution_time'].mean():.3f}",
        'Min Execution Time (s)': f"{cache_data['avg_execution_time'].min():.3f}",
        'Max Execution Time (s)': f"{cache_data['avg_execution_time'].max():.3f}",
        'Std Deviation (s)': f"{cache_data['avg_execution_time'].std():.3f}",
        'Avg Memory Increase (MB)': f"{cache_data['memory_increase'].mean():.2f}",
        'Cache Hit Rate (%)': f"{cache_data['cache_hit_rate'].mean():.1f}" if cache_mode != 'none' else 'N/A'
    }
    summary_stats.append(stats)

summary_df = pd.DataFrame(summary_stats)

# Create styled table visualization
fig, ax = plt.subplots(figsize=(14, 6))
ax.axis('tight')
ax.axis('off')

table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns,
                cellLoc='center', loc='center', bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(11)
table.scale(1.2, 2.5)

# Style the table with professional colors
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']
for (i, j), cell in table.get_celld().items():
    if i == 0:  # Header row
        cell.set_text_props(weight='bold', color='white')
        cell.set_facecolor('#2C3E50')
    else:
        if i == 1:  # NONE row
            cell.set_facecolor('#FFE5E5')
        elif i == 2:  # FIFO row
            cell.set_facecolor('#E5F7F5')
        else:  # LRU row
            cell.set_facecolor('#E5F3FF')
        
        if j == 0:  # First column (Cache Mode)
            cell.set_text_props(weight='bold')

plt.title('📋 Comprehensive Performance Summary Table', 
          fontsize=16, fontweight='bold', pad=30)
plt.savefig('performance_summary_table.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

# Save summary to CSV
summary_df.to_csv('performance_summary.csv', index=False)
print("✅ Performance summary table created and saved to performance_summary.csv")

# Display the dataframe
print("\n📊 Performance Summary:")
display(summary_df)

✅ Performance summary table created and saved to performance_summary.csv

📊 Performance Summary:


Unnamed: 0,Cache Mode,Avg Execution Time (s),Min Execution Time (s),Max Execution Time (s),Std Deviation (s),Avg Memory Increase (MB),Cache Hit Rate (%)
0,NONE,3.778,1.416,6.806,2.756,1.9,
1,FIFO,4.009,1.468,7.54,3.154,0.0,90.9
2,LRU,3.433,1.444,5.649,2.112,0.21,90.9


In [9]:
# Final Results Summary
print("\n" + "🎉" * 20 + " ANALYSIS COMPLETE " + "🎉" * 20)
print("\n📁 Generated Files:")
print("   ✅ performance_comparison_dashboard.png - Main performance overview")
print("   ✅ performance_heatmap.png - Detailed scenario comparison") 
print("   ✅ scalability_analysis.png - Data size impact analysis")
print("   ✅ performance_summary_table.png - Comprehensive metrics table")
print("   ✅ performance_summary.csv - Exportable summary data")

print("\n🔍 Key Findings:")
if 'none' in perf_by_mode.index and 'lru' in perf_by_mode.index:
    lru_improvement = ((perf_by_mode['none'] - perf_by_mode['lru']) / perf_by_mode['none']) * 100
    print(f"   🚀 LRU caching provides {lru_improvement:.1f}% performance improvement over no caching")

if 'fifo' in perf_by_mode.index and 'lru' in perf_by_mode.index:
    lru_vs_fifo = ((perf_by_mode['fifo'] - perf_by_mode['lru']) / perf_by_mode['fifo']) * 100
    print(f"   🏆 LRU outperforms FIFO by {lru_vs_fifo:.1f}%")

avg_hit_rate = cache_efficiency.mean() if len(cache_efficiency) > 0 else 0
print(f"   🎯 Average cache hit rate: {avg_hit_rate:.1f}%")

print("\n💡 Recommendations:")
print("   1. Deploy LRU caching for optimal performance")
print("   2. Monitor cache hit rates in production")
print("   3. Consider cache size tuning for memory efficiency")
print("   4. Implement gradual rollout with performance monitoring")

print("\n" + "="*80)


🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉 ANALYSIS COMPLETE 🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

📁 Generated Files:
   ✅ performance_comparison_dashboard.png - Main performance overview
   ✅ performance_heatmap.png - Detailed scenario comparison
   ✅ scalability_analysis.png - Data size impact analysis
   ✅ performance_summary_table.png - Comprehensive metrics table
   ✅ performance_summary.csv - Exportable summary data

🔍 Key Findings:
   🚀 LRU caching provides 9.1% performance improvement over no caching
   🏆 LRU outperforms FIFO by 14.4%
   🎯 Average cache hit rate: 90.9%

💡 Recommendations:
   1. Deploy LRU caching for optimal performance
   2. Monitor cache hit rates in production
   3. Consider cache size tuning for memory efficiency
   4. Implement gradual rollout with performance monitoring

