In [1]:
import numpy as np
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import time
import json
import os
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import requests
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# D-Wave Ocean SDK imports
import dimod
from dwave.samplers import SimulatedAnnealingSampler, TabuSampler
from neal import SimulatedAnnealingSampler as NealSampler
try:
    from dwave.system import DWaveSampler, EmbeddingComposite
    DWAVE_AVAILABLE = True
except ImportError:
    print("D-Wave system not available - using simulators only")
    DWAVE_AVAILABLE = False

In [2]:
class GsetLoader:
    """Load and process Gset Max-Cut benchmark instances"""
    
    def __init__(self, data_dir="gset_data"):
        self.data_dir = Path(data_dir)
        self.data_dir.mkdir(exist_ok=True)
        self.base_url = "http://web.stanford.edu/~yyye/yyye/Gset/"
        
        # Gset instance information
        self.instance_info = {
            # Small instances good for D-Wave
            11: {"nodes": 800, "edges": 1600, "density": 0.005, "best_known": 564},
            12: {"nodes": 800, "edges": 1600, "density": 0.005, "best_known": 556},
            13: {"nodes": 800, "edges": 1600, "density": 0.005, "best_known": 582},
            14: {"nodes": 800, "edges": 4694, "density": 0.015, "best_known": 3064},
            
            # Medium instances
            22: {"nodes": 2000, "edges": 19990, "density": 0.010, "best_known": 13359},
            43: {"nodes": 1000, "edges": 9990, "density": 0.020, "best_known": 6660},
            44: {"nodes": 1000, "edges": 9990, "density": 0.020, "best_known": 6650},
            
            # Large instances (GPU territory)
            48: {"nodes": 3000, "edges": 6000, "density": 0.001, "best_known": 6000},
            49: {"nodes": 3000, "edges": 6000, "density": 0.001, "best_known": 6000},
            54: {"nodes": 5000, "edges": 12498, "density": 0.001, "best_known": 4030},
            55: {"nodes": 5000, "edges": 12498, "density": 0.001, "best_known": 10294},
            
            # Very large instances
            70: {"nodes": 10000, "edges": 9999, "density": 0.0002, "best_known": 9541},
            71: {"nodes": 10000, "edges": 9999, "density": 0.0002, "best_known": 9552},
            72: {"nodes": 10000, "edges": 9999, "density": 0.0002, "best_known": 6037},
            81: {"nodes": 20000, "edges": 40000, "density": 0.0002, "best_known": 15364}
        }
    
    def download_instance(self, instance_num: int) -> Optional[Path]:
        """Download Gset instance if not already cached"""
        filename = f"G{instance_num}"
        filepath = self.data_dir / filename
        
        if filepath.exists():
            return filepath
        
        url = f"{self.base_url}{filename}"
        print(f"Downloading G{instance_num}...")
        
        try:
            response = requests.get(url, timeout=30)
            response.raise_for_status()
            
            with open(filepath, 'w') as f:
                f.write(response.text)
            
            return filepath
        except Exception as e:
            print(f"Error downloading G{instance_num}: {e}")
            return None
    
    def load_instance(self, instance_num: int) -> Optional[nx.Graph]:
        """Load Gset instance as NetworkX graph"""
        filepath = self.download_instance(instance_num)
        if not filepath:
            return None
        
        try:
            with open(filepath, 'r') as f:
                lines = f.readlines()
            
            # Parse header
            first_line = lines[0].strip().split()
            num_vertices = int(first_line[0])
            num_edges = int(first_line[1])
            
            # Create graph with 0-based indexing
            G = nx.Graph()
            G.add_nodes_from(range(num_vertices))
            
            # Add edges
            for line in lines[1:]:
                if line.strip():
                    parts = line.strip().split()
                    if len(parts) >= 3:
                        # Convert to 0-based indexing
                        u, v, weight = int(parts[0])-1, int(parts[1])-1, float(parts[2])
                        G.add_edge(u, v, weight=weight)
            
            return G
        except Exception as e:
            print(f"Error parsing G{instance_num}: {e}")
            return None

In [3]:
class MaxCutQUBO:
    """Max-Cut QUBO formulation and utilities"""
    
    @staticmethod
    def graph_to_qubo(G: nx.Graph) -> Dict[Tuple[int, int], float]:
        """Convert Max-Cut problem to QUBO formulation"""
        Q = {}
        
        # Max-Cut QUBO: minimize -0.5 * sum(w_ij * (x_i - x_j)^2)
        # Expanded: minimize -0.5 * sum(w_ij * (x_i + x_j - 2*x_i*x_j))
        # Which gives: minimize sum(w_ij * (x_i*x_j - 0.5*(x_i + x_j)))
        
        for u, v, data in G.edges(data=True):
            weight = data.get('weight', 1.0)
            
            # Diagonal terms: +0.5 * w_ij for each vertex
            Q[(u, u)] = Q.get((u, u), 0) + 0.5 * weight
            Q[(v, v)] = Q.get((v, v), 0) + 0.5 * weight
            
            # Off-diagonal term: -w_ij
            Q[(u, v)] = Q.get((u, v), 0) - weight
        
        return Q
    
    @staticmethod
    def evaluate_cut(G: nx.Graph, solution: Dict[int, int]) -> float:
        """Evaluate Max-Cut objective for a solution"""
        cut_value = 0
        for u, v, data in G.edges(data=True):
            weight = data.get('weight', 1.0)
            if solution[u] != solution[v]:  # Edge crosses the cut
                cut_value += weight
        return cut_value
    
    @staticmethod
    def qubo_energy_to_cut_value(qubo_energy: float, total_weight: float) -> float:
        """Convert QUBO energy back to Max-Cut value"""
        # QUBO energy = 0.5 * total_weight - cut_value
        return 0.5 * total_weight - qubo_energy

In [4]:
class MaxCutBenchmark:
    """Comprehensive Max-Cut benchmarking system"""
    
    def __init__(self, results_dir="maxcut_results"):
        self.results_dir = Path(results_dir)
        self.results_dir.mkdir(exist_ok=True)
        
        self.loader = GsetLoader()
        self.results = {}
        
        # Initialize samplers
        self.samplers = self._initialize_samplers()
        
    def _initialize_samplers(self) -> Dict:
        """Initialize all available samplers"""
        samplers = {
            'neal': NealSampler(),
            'simulated_annealing': SimulatedAnnealingSampler(),
            'tabu': TabuSampler()
        }
        
        # Add D-Wave if available
        if DWAVE_AVAILABLE:
            try:
                samplers['dwave'] = EmbeddingComposite(DWaveSampler())
                print("D-Wave sampler initialized successfully")
            except Exception as e:
                print(f"D-Wave sampler initialization failed: {e}")
        
        print(f"Initialized samplers: {list(samplers.keys())}")
        return samplers
    
    def benchmark_instance(self, instance_num: int, num_runs: int = 5) -> Dict:
        """Benchmark single Gset instance across all samplers"""
        print(f"\n=== Benchmarking G{instance_num} ===")
        
        # Load instance
        G = self.loader.load_instance(instance_num)
        if not G:
            print(f"Failed to load G{instance_num}")
            return {}
        
        Q = MaxCutQUBO.graph_to_qubo(G)
        instance_info = self.loader.instance_info.get(instance_num, {})
        best_known = instance_info.get('best_known', None)
        
        # Calculate total edge weight for energy conversion
        total_weight = sum(data.get('weight', 1.0) for _, _, data in G.edges(data=True))
        
        print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")
        print(f"QUBO terms: {len(Q)}, Best known: {best_known}")
        
        results = {
            'instance': instance_num,
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'density': instance_info.get('density', 0),
            'best_known': best_known,
            'total_weight': total_weight,
            'samplers': {}
        }
        
        # Test each sampler
        for sampler_name, sampler in self.samplers.items():
            print(f"\nTesting {sampler_name}...")
            
            sampler_results = {
                'energies': [],
                'cut_values': [],
                'times': [],
                'successful_runs': 0
            }
            
            for run in range(num_runs):
                try:
                    start_time = time.time()
                    
                    # Configure sampler parameters
                    if sampler_name == 'dwave':
                        response = sampler.sample_qubo(
                            Q, 
                            num_reads=1000,
                            annealing_time=20,
                            chain_strength=max(abs(v) for v in Q.values()) * 2
                        )
                    elif sampler_name == 'neal':
                        response = sampler.sample_qubo(
                            Q, 
                            num_reads=1000,
                            num_sweeps=10000
                        )
                    elif sampler_name == 'tabu':
                        response = sampler.sample_qubo(Q, num_reads=10, timeout=60)
                    else:  # simulated_annealing
                        response = sampler.sample_qubo(Q, num_reads=100)
                    
                    run_time = time.time() - start_time
                    
                    # Get best solution
                    best_sample = response.first
                    energy = best_sample.energy
                    solution = dict(best_sample.sample)
                    
                    # Calculate cut value
                    cut_value = MaxCutQUBO.evaluate_cut(G, solution)
                    
                    sampler_results['energies'].append(energy)
                    sampler_results['cut_values'].append(cut_value)
                    sampler_results['times'].append(run_time)
                    sampler_results['successful_runs'] += 1
                    
                    print(f"  Run {run+1}: Cut={cut_value:.1f}, Time={run_time:.2f}s")
                    
                except Exception as e:
                    print(f"  Run {run+1} failed: {e}")
                    continue
            
            # Calculate statistics
            if sampler_results['successful_runs'] > 0:
                sampler_results['best_cut'] = max(sampler_results['cut_values'])
                sampler_results['avg_cut'] = np.mean(sampler_results['cut_values'])
                sampler_results['std_cut'] = np.std(sampler_results['cut_values'])
                sampler_results['avg_time'] = np.mean(sampler_results['times'])
                sampler_results['std_time'] = np.std(sampler_results['times'])
                
                # Calculate approximation ratio if best known available
                if best_known:
                    sampler_results['approximation_ratio'] = sampler_results['best_cut'] / best_known
                    sampler_results['avg_approximation_ratio'] = sampler_results['avg_cut'] / best_known
                
                print(f"  Best cut: {sampler_results['best_cut']:.1f}")
                if best_known:
                    print(f"  Approximation ratio: {sampler_results['approximation_ratio']:.4f}")
            
            results['samplers'][sampler_name] = sampler_results
        
        return results
    
    def run_systematic_benchmark(self, instance_list: List[int] = None, num_runs: int = 5):
        """Run systematic benchmark across instance progression"""
        if instance_list is None:
            # Default progression from small to large
            instance_list = [11, 12, 13, 14, 43]#, 44, 48, 49, 54, 70, 71, 81]
        
        print(f"Starting systematic benchmark of {len(instance_list)} instances")
        print(f"Instance progression: {instance_list}")
        
        all_results = []
        
        for instance_num in instance_list:
            try:
                result = self.benchmark_instance(instance_num, num_runs)
                if result:
                    all_results.append(result)
                    self.results[instance_num] = result
                    
                    # Save intermediate results
                    self._save_results()
                    
            except KeyboardInterrupt:
                print("\nBenchmark interrupted by user")
                break
            except Exception as e:
                print(f"Error benchmarking G{instance_num}: {e}")
                continue
        
        # Generate final report
        self._generate_report(all_results)
        return all_results
    
    def _save_results(self):
        """Save results to JSON file"""
        results_file = self.results_dir / "benchmark_results.json"
        with open(results_file, 'w') as f:
            json.dump(self.results, f, indent=2, default=str)
    
    def analyze_key_metrics(self, results: List[Dict]):
        """Analyze and report key performance metrics"""
        print("\n" + "="*60)
        print("KEY METRIC ANALYSIS")
        print("="*60)
        
        if not results:
            return
        
        # Extract data for analysis
        sampler_names = list(results[0]['samplers'].keys())
        
        print("\n1. SOLUTION QUALITY ANALYSIS")
        print("-" * 40)
        quality_summary = {}
        
        for sampler_name in sampler_names:
            ratios = []
            for result in results:
                if (result['best_known'] and 
                    result['samplers'][sampler_name]['successful_runs'] > 0):
                    ratio = result['samplers'][sampler_name]['best_cut'] / result['best_known']
                    ratios.append(ratio)
            
            if ratios:
                quality_summary[sampler_name] = {
                    'avg_ratio': np.mean(ratios),
                    'min_ratio': min(ratios),
                    'max_ratio': max(ratios),
                    'std_ratio': np.std(ratios)
                }
                
                print(f"{sampler_name:20}: Avg={quality_summary[sampler_name]['avg_ratio']:.4f}, "
                      f"Range=[{quality_summary[sampler_name]['min_ratio']:.4f}-"
                      f"{quality_summary[sampler_name]['max_ratio']:.4f}], "
                      f"Std={quality_summary[sampler_name]['std_ratio']:.4f}")
        
        print("\n2. RUNTIME PERFORMANCE ANALYSIS")
        print("-" * 40)
        runtime_summary = {}
        
        for sampler_name in sampler_names:
            times = []
            nodes = []
            for result in results:
                if result['samplers'][sampler_name]['successful_runs'] > 0:
                    times.append(result['samplers'][sampler_name]['avg_time'])
                    nodes.append(result['nodes'])
            
            if times:
                runtime_summary[sampler_name] = {
                    'avg_time': np.mean(times),
                    'min_time': min(times),
                    'max_time': max(times),
                    'time_per_node': np.mean([t/n for t, n in zip(times, nodes)])
                }
                
                print(f"{sampler_name:20}: Avg={runtime_summary[sampler_name]['avg_time']:.2f}s, "
                      f"Range=[{runtime_summary[sampler_name]['min_time']:.2f}-"
                      f"{runtime_summary[sampler_name]['max_time']:.2f}]s, "
                      f"Time/Node={runtime_summary[sampler_name]['time_per_node']*1000:.2f}ms")
        
        print("\n3. SCALABILITY ANALYSIS")
        print("-" * 40)
        
        # Analyze largest problem each sampler could handle
        max_nodes_handled = {}
        for sampler_name in sampler_names:
            max_nodes = 0
            for result in results:
                if result['samplers'][sampler_name]['successful_runs'] > 0:
                    max_nodes = max(max_nodes, result['nodes'])
            max_nodes_handled[sampler_name] = max_nodes
            print(f"{sampler_name:20}: Max problem size = {max_nodes} nodes")
        
        print("\n4. EFFICIENCY RANKING")
        print("-" * 40)
        
        # Combine quality and speed for efficiency ranking
        efficiency_scores = {}
        for sampler_name in sampler_names:
            if (sampler_name in quality_summary and 
                sampler_name in runtime_summary):
                
                # Higher quality ratio = better, lower time = better
                quality_score = quality_summary[sampler_name]['avg_ratio']
                speed_score = 1.0 / runtime_summary[sampler_name]['avg_time']  # Invert so higher is better
                
                # Normalize and combine (you can adjust weights)
                quality_weight = 0.6  # Prioritize solution quality
                speed_weight = 0.4
                
                efficiency_scores[sampler_name] = (
                    quality_weight * quality_score + 
                    speed_weight * speed_score / max(s for s in [speed_score] + 
                                                   [1.0/runtime_summary[s]['avg_time'] 
                                                    for s in runtime_summary])
                )
        
        # Sort by efficiency
        ranked_samplers = sorted(efficiency_scores.items(), 
                               key=lambda x: x[1], reverse=True)
        
        print("Overall Efficiency Ranking (Quality=60%, Speed=40%):")
        for i, (sampler_name, score) in enumerate(ranked_samplers):
            print(f"{i+1}. {sampler_name:20}: Efficiency Score = {score:.4f}")
        
        print("\n5. RESEARCH RECOMMENDATIONS")
        print("-" * 40)
        
        if 'dwave' in quality_summary and 'neal' in quality_summary:
            dwave_quality = quality_summary['dwave']['avg_ratio']
            neal_quality = quality_summary['neal']['avg_ratio']
            quality_diff = abs(dwave_quality - neal_quality)
            
            if quality_diff < 0.01:
                print("• Solution quality is comparable between D-Wave and GPU simulators")
            elif dwave_quality > neal_quality:
                print(f"• D-Wave shows {((dwave_quality/neal_quality-1)*100):.1f}% better solution quality")
            else:
                print(f"• GPU simulator shows {((neal_quality/dwave_quality-1)*100):.1f}% better solution quality")
        
        if 'dwave' in runtime_summary and 'neal' in runtime_summary:
            dwave_time = runtime_summary['dwave']['avg_time']
            neal_time = runtime_summary['neal']['avg_time']
            
            if dwave_time > neal_time:
                print(f"• GPU simulator is {(dwave_time/neal_time):.1f}x faster than D-Wave")
            else:
                print(f"• D-Wave is {(neal_time/dwave_time):.1f}x faster than GPU simulator")
        
        # Identify crossover point
        dwave_max = max_nodes_handled.get('dwave', 0)
        gpu_max = max(max_nodes_handled.get(s, 0) for s in max_nodes_handled if s != 'dwave')
        
        if dwave_max > 0 and gpu_max > dwave_max:
            print(f"• Quantum advantage disappears around {dwave_max} nodes")
            print(f"• GPU simulators scale to {gpu_max}+ nodes")
        
        return quality_summary, runtime_summary, efficiency_scores
    def _generate_report(self, results: List[Dict]):
        """Generate comprehensive benchmark report"""
        if not results:
            return
        
        # First, analyze key metrics
        self.analyze_key_metrics(results)
        
        # Create summary table
        summary_data = []
        for result in results:
            instance = result['instance']
            nodes = result['nodes']
            best_known = result['best_known']
            
            row = {
                'Instance': f"G{instance}",
                'Nodes': nodes,
                'Edges': result['edges'],
                'Density': f"{result['density']:.4f}",
                'Best_Known': best_known if best_known else 'Unknown'
            }
            
            # Add sampler results
            for sampler_name, sampler_result in result['samplers'].items():
                if sampler_result['successful_runs'] > 0:
                    row[f'{sampler_name}_best'] = f"{sampler_result['best_cut']:.1f}"
                    row[f'{sampler_name}_time'] = f"{sampler_result['avg_time']:.2f}s"
                    if best_known:
                        ratio = sampler_result['best_cut'] / best_known
                        row[f'{sampler_name}_ratio'] = f"{ratio:.4f}"
                else:
                    row[f'{sampler_name}_best'] = 'Failed'
                    row[f'{sampler_name}_time'] = 'N/A'
                    row[f'{sampler_name}_ratio'] = 'N/A'
            
            summary_data.append(row)
        
        # Save summary table
        df = pd.DataFrame(summary_data)
        csv_file = self.results_dir / "benchmark_summary.csv"
        df.to_csv(csv_file, index=False)
        print(f"\nSummary saved to: {csv_file}")
        print("\nBenchmark Summary:")
        print(df.to_string(index=False))
        
        # Create visualizations
        self._create_visualizations(results)
    
    def _create_visualizations(self, results: List[Dict]):
        """Create benchmark visualization plots"""
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
        
        instances = [r['instance'] for r in results]
        nodes = [r['nodes'] for r in results]
        
        # Plot 1: Solution quality comparison (PRIMARY METRIC)
        sampler_names = list(results[0]['samplers'].keys())
        colors = plt.cm.Set1(np.linspace(0, 1, len(sampler_names)))
        
        for i, sampler_name in enumerate(sampler_names):
            ratios = []
            valid_instances = []
            for result in results:
                if result['best_known'] and result['samplers'][sampler_name]['successful_runs'] > 0:
                    ratio = result['samplers'][sampler_name]['best_cut'] / result['best_known']
                    ratios.append(ratio)
                    valid_instances.append(result['instance'])
            
            if ratios:
                ax1.plot(valid_instances, ratios, 'o-', label=sampler_name, 
                        color=colors[i], markersize=6)
        
        ax1.set_xlabel('Instance Number (Problem Size →)')
        ax1.set_ylabel('Approximation Ratio (Quality)')
        ax1.set_title('QUALITY: How Close to Optimal? (Higher = Better)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        ax1.axhline(y=1.0, color='red', linestyle='--', alpha=0.7, label='Perfect (Optimal)')
        ax1.set_ylim(0.85, 1.05)
        
        # Plot 2: Runtime scaling (PRIMARY METRIC)
        for i, sampler_name in enumerate(sampler_names):
            times = []
            valid_nodes = []
            for result in results:
                if result['samplers'][sampler_name]['successful_runs'] > 0:
                    times.append(result['samplers'][sampler_name]['avg_time'])
                    valid_nodes.append(result['nodes'])
            
            if times:
                ax2.loglog(valid_nodes, times, 'o-', label=sampler_name, 
                          color=colors[i], markersize=6, linewidth=2)
        
        ax2.set_xlabel('Problem Size (Number of Nodes)')
        ax2.set_ylabel('Runtime (seconds) - Log Scale')
        ax2.set_title('SPEED: Runtime Scaling (Lower = Faster)')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # Add reference lines for different scaling behaviors
        if valid_nodes:
            min_nodes, max_nodes = min(valid_nodes), max(valid_nodes)
            x_ref = np.array([min_nodes, max_nodes])
            ax2.plot(x_ref, x_ref/min_nodes * 0.1, '--', alpha=0.5, label='Linear O(n)', color='gray')
            ax2.plot(x_ref, (x_ref/min_nodes)**2 * 0.01, '--', alpha=0.5, label='Quadratic O(n²)', color='gray')
        
        # Plot 3: Success rate by instance size
        size_bins = [0, 1000, 3000, 10000, float('inf')]
        size_labels = ['<1K', '1K-3K', '3K-10K', '>10K']
        
        success_rates = {sampler: [0]*len(size_labels) for sampler in sampler_names}
        counts = [0] * len(size_labels)
        
        for result in results:
            nodes = result['nodes']
            size_idx = next(i for i, bin_max in enumerate(size_bins[1:]) if nodes <= bin_max)
            counts[size_idx] += 1
            
            for sampler_name in sampler_names:
                if result['samplers'][sampler_name]['successful_runs'] > 0:
                    success_rates[sampler_name][size_idx] += 1
        
        # Normalize to percentages
        for sampler_name in sampler_names:
            for i in range(len(size_labels)):
                if counts[i] > 0:
                    success_rates[sampler_name][i] = 100 * success_rates[sampler_name][i] / counts[i]
        
        x = np.arange(len(size_labels))
        width = 0.8 / len(sampler_names)
        
        for i, sampler_name in enumerate(sampler_names):
            ax3.bar(x + i * width, success_rates[sampler_name], width, 
                   label=sampler_name, color=colors[i], alpha=0.7)
        
        ax3.set_xlabel('Problem Size')
        ax3.set_ylabel('Success Rate (%)')
        ax3.set_title('Success Rate by Problem Size')
        ax3.set_xticks(x + width * (len(sampler_names) - 1) / 2)
        ax3.set_xticklabels(size_labels)
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # Plot 4: Best solutions found vs known optima
        for result in results:
            if result['best_known']:
                best_found = max(
                    sampler_result['best_cut'] 
                    for sampler_result in result['samplers'].values()
                    if sampler_result['successful_runs'] > 0
                )
                ax4.scatter(result['best_known'], best_found, 
                           s=50, alpha=0.7, c=result['nodes'], cmap='viridis')
        
        # Perfect correlation line
        if results:
            all_known = [r['best_known'] for r in results if r['best_known']]
            if all_known:
                min_val, max_val = min(all_known), max(all_known)
                ax4.plot([min_val, max_val], [min_val, max_val], 
                        'r--', alpha=0.5, label='Perfect')
        
        ax4.set_xlabel('Known Optimal Value')
        ax4.set_ylabel('Best Found Value')
        ax4.set_title('Solution Quality vs Known Optima')
        cbar = plt.colorbar(ax4.collections[0] if ax4.collections else None, ax=ax4)
        cbar.set_label('Number of Nodes')
        ax4.legend()
        ax4.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Save plots
        plot_file = self.results_dir / "benchmark_plots.png"
        plt.savefig(plot_file, dpi=300, bbox_inches='tight')
        print(f"Plots saved to: {plot_file}")
        
        plt.show()

In [5]:
def main():
    """Main benchmarking script"""
    print("=== Max-Cut GPU Benchmarking with D-Wave Ocean SDK ===\n")
    
    # Initialize benchmark system
    benchmark = MaxCutBenchmark()
    
    # Define test progression
    small_instances = [11, 12, 13, 14]  # Good for D-Wave testing
    medium_instances = [43, 44]         # Borderline D-Wave capability
    large_instances = [48, 49, 54]      # GPU territory
    very_large_instances = [70, 71]     # Large-scale GPU testing
    
    # Choose your test set based on available time/resources
    quick_test = small_instances[:2]
    full_test = small_instances + medium_instances + large_instances
    comprehensive_test = small_instances + medium_instances + large_instances + very_large_instances
    
    print("Available test configurations:")
    print(f"1. Quick test: {quick_test}")
    print(f"2. Full test: {full_test}")
    print(f"3. Comprehensive test: {comprehensive_test}")
    
    # Run benchmark (change this to your desired test set)
    test_instances = quick_test  # Start with quick test
    
    print(f"\nRunning benchmark on instances: {test_instances}")
    results = benchmark.run_systematic_benchmark(test_instances, num_runs=3)
    
    print("\n=== Benchmark Complete ===")
    print(f"Results saved in: {benchmark.results_dir}")
    
    return results

if __name__ == "__main__":
    results = main()

=== Max-Cut GPU Benchmarking with D-Wave Ocean SDK ===

D-Wave sampler initialization failed: API token not defined
Initialized samplers: ['neal', 'simulated_annealing', 'tabu']
Available test configurations:
1. Quick test: [11, 12]
2. Full test: [11, 12, 13, 14, 43, 44, 48, 49, 54]
3. Comprehensive test: [11, 12, 13, 14, 43, 44, 48, 49, 54, 70, 71]

Running benchmark on instances: [11, 12]
Starting systematic benchmark of 2 instances
Instance progression: [11, 12]

=== Benchmarking G11 ===
Downloading G11...
Nodes: 800, Edges: 1600
QUBO terms: 2400, Best known: 564

Testing neal...
  Run 1: Cut=-530.0, Time=147.36s
  Run 2: Cut=-530.0, Time=151.00s

Benchmark interrupted by user

=== Benchmark Complete ===
Results saved in: maxcut_results
