In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple
import logging
from pathlib import Path
import json
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor
import itertools
import time
from data_generator import TestConfiguration, create_test_instance
from dynamic_pricing_algorithms import DynamicProgramming, StochasticApproximation

In [None]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
class ScalabilityExperiment:
    """
    Implementation of Experiment 2: Computational Efficiency and Scalability
    """
    
    def __init__(self, base_results_dir: str = "../experiments/experiment2"):
        """Initialize experiment parameters and directories."""
        # Set up results directory
        self.base_results_dir = Path(base_results_dir)
        self.output_dir = self.base_results_dir / "results"
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # Define property configurations
        self.property_configs = [
            {
                'type': 'boutique',
                'capacity': 50,
                'booking_horizons': [28, 56, 84],
                'service_horizons': [7, 14, 28]
            },
            {
                'type': 'mid_size',
                'capacity': 150,
                'booking_horizons': [56, 84, 168],
                'service_horizons': [14, 28, 84]
            },
            {
                'type': 'large',
                'capacity': 300,
                'booking_horizons': [84, 168, 336],
                'service_horizons': [28, 84]
            },
            {
                'type': 'resort_chain',
                'capacity': 600,
                'booking_horizons': [168, 336],
                'service_horizons': [28, 84]
            }
        ]
        
        # Generate test cases from property configurations
        self.test_cases = self._generate_test_cases()
        
        # Fixed parameters
        self.market_condition = 'standard'
        self.demand_scenario = 'base'
        
        # SAA learning parameters
        self.learning_params = {
            'eta_0': 0.3,
            'gamma': 0.05,
            'eta_min': 0.001,
            'max_epochs': 1000,
            'batch_size': 64
        }
        
        # Experiment parameters
        self.num_replications = 5
        self.dp_size_limit = 1000
        
        logger.info(f"Initialized ScalabilityExperiment with {len(self.test_cases)} test cases")
        
    def _generate_test_cases(self) -> List[Dict]:
        """Generate all test cases from property configurations."""
        test_cases = []
        for config in self.property_configs:
            for T in config['booking_horizons']:
                for N in config['service_horizons']:
                    test_case = {
                        'property_type': config['type'],
                        'capacity': config['capacity'],
                        'T': T,
                        'N': N
                    }
                    test_cases.append(test_case)
        return test_cases
        
    def generate_test_instance(self, test_case: Dict, seed: int) -> Dict:
        """Generate a test instance with specified parameters."""
        config = TestConfiguration()
        test_params = config.get_config(
            test_type='minimal',
            market_condition=self.market_condition,
            discretization='standard'
        )
        
        # Override with experiment-specific parameters
        test_params.update({
            'T': test_case['T'],
            'N': test_case['N'],
            'C': test_case['capacity']
        })
        
        return create_test_instance(
            demand_scenario=self.demand_scenario,
            market_condition=self.market_condition,
            test_configuration=test_params,
            seed=seed
        )
        
    def run_single_instance(self, test_case: Dict, replication: int) -> Dict:
        """Run algorithms on a single test instance."""
        try:
            logger.debug(f"Processing test case: {test_case}, replication: {replication}")
            
            # Generate test instance
            instance = self.generate_test_instance(test_case, seed=1000 * replication)
            
            # Initialize results dictionary with test case parameters
            results = {
                **test_case,  # Include all test case parameters
                'replication': replication,
                'state_space_size': test_case['capacity'] ** test_case['N']
            }
            
            # Run SAA
            saa = StochasticApproximation(instance, self.learning_params)
            saa_start = time.time()
            saa_prices, saa_revenue, saa_time = saa.solve()
            results['saa_time'] = saa_time
            results['saa_revenue'] = saa_revenue
            
            # Run DP only if state space is manageable
            if results['state_space_size'] <= self.dp_size_limit:
                dp = DynamicProgramming(instance)
                dp_start = time.time()
                dp_policy, dp_value = dp.solve()
                results['dp_time'] = time.time() - dp_start
                results['dp_revenue'] = dp_value
            else:
                results['dp_time'] = None
                results['dp_revenue'] = None
            
            logger.debug(f"Completed test case: {test_case}, replication: {replication}")
            return results
            
        except Exception as e:
            logger.error(f"Error processing test case {test_case}, replication {replication}: {str(e)}")
            raise
            
    def run_experiment(self, num_workers: int = 4) -> pd.DataFrame:
        """Run the complete scalability experiment."""
        logger.info("Starting scalability experiment")
        
        # Generate replication cases
        all_test_cases = []
        for test_case in self.test_cases:
            for r in range(self.num_replications):
                case = {**test_case, 'replication': r}
                all_test_cases.append(case)
                
        logger.info(f"Generated {len(all_test_cases)} total test cases with replications")
        
        # Run experiments in parallel
        results = []
        with ProcessPoolExecutor(max_workers=num_workers) as executor:
            future_to_case = {
                executor.submit(
                    self.run_single_instance,
                    case,
                    case['replication']
                ): case for case in all_test_cases
            }
            
            for future in future_to_case:
                try:
                    result = future.result()
                    if result is not None:
                        results.append(result)
                except Exception as e:
                    logger.error(f"Error in parallel execution: {str(e)}")
                    continue
        
        # Convert to DataFrame
        results_df = pd.DataFrame(results)
        
        # Save raw results
        results_df.to_csv(self.output_dir / 'raw_results.csv', index=False)
        logger.info(f"Completed experiment with {len(results_df)} successful test cases")
        
        return results_df

In [None]:
def test_scalability_experiment():
    """Test the ScalabilityExperiment implementation."""
    # Initialize experiment
    experiment = ScalabilityExperiment()
    
    # Verify test case generation
    assert len(experiment.test_cases) > 0, "Test cases should be generated"
    test_case = experiment.test_cases[0]
    assert all(k in test_case for k in ['property_type', 'capacity', 'T', 'N']), \
        "Test case should contain all required parameters"
    
    # Test single instance execution
    result = experiment.run_single_instance(test_case, replication=0)
    assert result is not None, "Single instance execution should succeed"
    assert all(k in result for k in ['saa_time', 'saa_revenue']), \
        "Result should contain algorithm metrics"
    
    # Test full experiment execution with minimal replications
    experiment.num_replications = 1  # Minimize test time
    results_df = experiment.run_experiment(num_workers=1)
    assert len(results_df) > 0, "Experiment should produce results"
    assert 'property_type' in results_df.columns, "Results should contain property type"
    assert 'capacity' in results_df.columns, "Results should contain capacity"
    
    print("All tests passed successfully!")

In [None]:
if __name__ == "__main__":
    test_scalability_experiment()