In [None]:
import numpy as np
import json
import os
from pathlib import Path
import qiskit
from itertools import product

from qiskit import qasm2, QuantumCircuit
from qiskit.quantum_info import Statevector, DensityMatrix
from qiskit_aer import AerSimulator
from qiskit_aer.noise import (
    NoiseModel, 
    depolarizing_error, 
    amplitude_damping_error,
    ReadoutError
)
from qiskit.primitives import StatevectorEstimator as Estimator
from qiskit.quantum_info import SparsePauliOp
from qiskit.circuit.library import EfficientSU2 
import warnings
warnings.filterwarnings('ignore')

In [None]:
class CircuitGenerator:
    def __init__(self, num_qubits=4, seed=42):
        self.num_qubits = num_qubits
        self.seed = seed
        np.random.seed(seed)
    
    def create_random_circuit(self, depth=2, entanglement="linear", circuit_id=0):
        entanglement_map = {
            'linear': 'linear',
            'full': 'full',
            'pairwise': 'pairwise'
        }
        
        ent_pattern = entanglement_map.get(entanglement, 'linear')
        
        ansatz = EfficientSU2(
            num_qubits=self.num_qubits,
            reps=depth,
            entanglement=ent_pattern,
            insert_barriers=False
        )
        
        num_params = ansatz.num_parameters
        random_params = np.random.uniform(0, 2*np.pi, num_params)
        
        qc = ansatz.assign_parameters(random_params)
        
        qc.metadata = {
            'circuit_id': circuit_id,
            'num_qubits': self.num_qubits,
            'depth': depth,
            'entanglement': entanglement,
            'ansatz_type': 'EfficientSU2',
            'num_parameters': num_params,
            'parameters': random_params.tolist()
        }
        
        return qc
    
    def get_observables(self):
        pauli_string = 'Z' * self.num_qubits
        obs = SparsePauliOp(pauli_string)
        return [(obs, f'Z_all')]
    
    def simulate_ideal(self, circuit):
        sv = Statevector.from_instruction(circuit)
        observables = self.get_observables()
        expectations = {}
        
        for obs, label in observables:
            exp_val = sv.expectation_value(obs).real
            expectations[label] = exp_val
        
        return expectations
    
    def generate_dataset(self, num_circuits=20, base_depth=2, depth_multipliers=[1, 2, 3], 
                        entanglement_types=["linear", "full", "pairwise"], output_dir='data/ideal'):
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        
        total_configs = len(depth_multipliers) * len(entanglement_types)
        total_circuits = num_circuits * total_configs
        
        print(f"Generating {total_circuits} circuits using EfficientSU2 ansatz")
        
        dataset = []
        circuit_counter = 0
        
        for depth_mult, entanglement in product(depth_multipliers, entanglement_types):
            actual_depth = base_depth * depth_mult
            
            for i in range(num_circuits):
                circuit = self.create_random_circuit(
                    depth=actual_depth, 
                    entanglement=entanglement, 
                    circuit_id=circuit_counter
                )
                
                ideal_expectations = self.simulate_ideal(circuit)
                
                ideal_expectations_serializable = {
                    key: float(value) if hasattr(value, 'item') else value
                    for key, value in ideal_expectations.items()
                }
                
                qasm_str = qasm2.dumps(circuit)
                
                entry = {
                    'circuit_id': int(circuit_counter),
                    'num_qubits': int(self.num_qubits),
                    'depth': int(actual_depth),
                    'depth_multiplier': int(depth_mult),
                    'base_depth': int(base_depth),
                    'entanglement': entanglement,
                    'ansatz_type': 'EfficientSU2',
                    'num_parameters': int(circuit.metadata['num_parameters']),
                    'ideal_expectations': ideal_expectations_serializable,
                    'circuit_qasm': qasm_str
                }
                
                dataset.append(entry)
                
                filename = f'circuit_{circuit_counter:04d}_d{actual_depth}_{entanglement}_ideal.json'
                with open(os.path.join(output_dir, filename), 'w') as f:
                    json.dump(entry, f, indent=2)
                
                circuit_counter += 1
                
                if circuit_counter % 10 == 0:
                    print(f"Generated {circuit_counter}/{total_circuits} circuits")

        return dataset

In [None]:
class NoiseInjector:
    def __init__(self, num_qubits=4):
        self.num_qubits = num_qubits
    
    def create_noise_model(self, noise_type='depolarizing', error_rate=0.01):
        noise_model = NoiseModel()
        
        if noise_type == 'depolarizing':
            error = depolarizing_error(error_rate, 1)
            noise_model.add_all_qubit_quantum_error(error, ['rx', 'ry', 'rz'])
            
            error_2q = depolarizing_error(min(error_rate * 10, 0.3), 2)
            noise_model.add_all_qubit_quantum_error(error_2q, ['cx'])
        
        elif noise_type == 'amplitude_damping':
            error = amplitude_damping_error(error_rate)
            noise_model.add_all_qubit_quantum_error(error, ['rx', 'ry', 'rz'])
        
        elif noise_type == 'readout':
            readout_error = ReadoutError([[1 - error_rate, error_rate],
                                           [error_rate, 1 - error_rate]])
            for qubit in range(self.num_qubits):
                noise_model.add_readout_error(readout_error, [qubit])
        
        return noise_model
    
    def simulate_noisy(self, circuit, noise_model, shots=8192):
        simulator = AerSimulator(noise_model=noise_model, method='density_matrix')
        
        pauli_string = 'Z' * self.num_qubits
        observable = SparsePauliOp(pauli_string)
        label = f'Z_all'
        
        qc_measure = circuit.copy()
        qc_measure.save_density_matrix()
        
        result = simulator.run(qc_measure, shots=1).result()
        noisy_dm = result.data()['density_matrix']
        
        exp_val = noisy_dm.expectation_value(observable).real
        
        return {label: float(exp_val)}
    
    def add_noise_to_dataset(self, ideal_dir='data/ideal', 
                            noisy_dir='data/noisy',
                            noise_types=['depolarizing', 'amplitude_damping', 'readout'],
                            error_rates=[0.001, 0.01, 0.1]):
        Path(noisy_dir).mkdir(parents=True, exist_ok=True)
        
        ideal_files = sorted(Path(ideal_dir).glob('circuit_*_ideal.json'))
        noise_configs = list(product(noise_types, error_rates))
        
        print(f"Applying noise to {len(ideal_files)} circuits...")
        
        for config_idx, (noise_type, error_rate) in enumerate(noise_configs):
            print(f"Config {config_idx+1}/{len(noise_configs)}: {noise_type} (rate={error_rate})")
            
            noise_model = self.create_noise_model(noise_type, error_rate)
            
            for ideal_file in ideal_files:
                with open(ideal_file, 'r') as f:
                    ideal_data = json.load(f)
                
                circuit_id = ideal_data['circuit_id']
                qc = QuantumCircuit.from_qasm_str(ideal_data['circuit_qasm'])
                noisy_expectations = self.simulate_noisy(qc, noise_model)
                
                noisy_entry = {
                    'circuit_id': int(circuit_id),
                    'num_qubits': int(ideal_data['num_qubits']),
                    'depth': int(ideal_data['depth']),
                    'depth_multiplier': int(ideal_data.get('depth_multiplier', 1)),
                    'entanglement': ideal_data.get('entanglement', 'linear'),
                    'ansatz_type': ideal_data.get('ansatz_type', 'EfficientSU2'),
                    'noise_type': noise_type,
                    'error_rate': float(error_rate),
                    'noisy_expectations': noisy_expectations,
                    'ideal_expectations': ideal_data['ideal_expectations']
                }
                
                filename = f'circuit_{circuit_id:04d}_noisy_{noise_type}_{error_rate}.json'
                with open(os.path.join(noisy_dir, filename), 'w') as f:
                    json.dump(noisy_entry, f, indent=2)
            
            print(f"Processed {len(ideal_files)} circuits with {noise_type}")


In [None]:
class DataIntegrator:
    def __init__(self):
        pass
    
    def create_training_pairs(self, noisy_dir='data/noisy', output_file='data/qem_dataset.json'):
        noisy_files = sorted(Path(noisy_dir).glob('circuit_*_noisy_*.json'))
        
        print(f"Creating training dataset from {len(noisy_files)} samples...")
        
        training_data = []
        
        for noisy_file in noisy_files:
            with open(noisy_file, 'r') as f:
                data = json.load(f)
            
            x_noisy = [list(data['noisy_expectations'].values())[0]]
            x_ideal = [list(data['ideal_expectations'].values())[0]]
            
            training_pair = {
                'circuit_id': int(data['circuit_id']),
                'num_qubits': int(data['num_qubits']),
                'depth': int(data['depth']),
                'depth_multiplier': int(data.get('depth_multiplier', 1)),
                'entanglement': data.get('entanglement', 'linear'),
                'ansatz_type': data.get('ansatz_type', 'EfficientSU2'),
                'noise_type': data['noise_type'],
                'error_rate': float(data['error_rate']),
                'x_noisy': x_noisy,
                'x_ideal': x_ideal,
                'observable_names': list(data['noisy_expectations'].keys())
            }
            
            training_data.append(training_pair)
        
        Path(output_file).parent.mkdir(parents=True, exist_ok=True)
        with open(output_file, 'w') as f:
            json.dump(training_data, f, indent=2)
        
        summary = {
            'num_samples': len(training_data),
            'num_features': 1,
            'ansatz_type': 'EfficientSU2',
            'qubit_counts': sorted(list(set(d['num_qubits'] for d in training_data))),
            'depth_multipliers': sorted(list(set(d.get('depth_multiplier', 1) for d in training_data))),
            'entanglement_types': sorted(list(set(d.get('entanglement', 'linear') for d in training_data))),
            'noise_types': sorted(list(set(d['noise_type'] for d in training_data))),
            'error_rates': sorted(list(set(d['error_rate'] for d in training_data))),
            'observable_used': 'Z⊗Z⊗...⊗Z (global Z measurement)'
        }
        
        summary_file = output_file.replace('.json', '_summary.json')
        with open(summary_file, 'w') as f:
            json.dump(summary, f, indent=2)
        
        print(f"Dataset saved to: {output_file}")
        print(f"Total samples: {len(training_data)}")
        
        return training_data
    
    def create_combined_dataset(self, qubit_counts, base_output_dir='data'):
        print(f"Combining datasets from all qubit configurations")
        
        combined_data = []
        qubit_stats = {}
        
        for num_qubits in qubit_counts:
            dataset_file = f'{base_output_dir}/{num_qubits}qubit/qem_dataset_{num_qubits}qubit.json'
            
            if not os.path.exists(dataset_file):
                print(f"Warning: Dataset for {num_qubits} qubits not found at {dataset_file}")
                continue
            
            with open(dataset_file, 'r') as f:
                qubit_data = json.load(f)
            
            combined_data.extend(qubit_data)
            
            qubit_stats[num_qubits] = {
                'num_samples': len(qubit_data),
                'num_features': 1,
            }
            
            print(f"Loaded {len(qubit_data)} samples from {num_qubits}-qubit dataset")
        
        combined_output_file = f'{base_output_dir}/qem_dataset_combined_full.json'
        Path(combined_output_file).parent.mkdir(parents=True, exist_ok=True)
        
        with open(combined_output_file, 'w') as f:
            json.dump(combined_data, f, indent=2)
        
        combined_summary = {
            'total_samples': len(combined_data),
            'ansatz_type': 'EfficientSU2',
            'qubit_configurations': qubit_counts,
            'samples_per_qubit_count': {
                str(qc): qubit_stats.get(qc, {}).get('num_samples', 0) 
                for qc in qubit_counts
            },
            'features_per_qubit_count': {str(qc): 1 for qc in qubit_counts},
            'depth_multipliers': sorted(list(set(d.get('depth_multiplier', 1) for d in combined_data))),
            'entanglement_types': sorted(list(set(d.get('entanglement', 'linear') for d in combined_data))),
            'noise_types': sorted(list(set(d['noise_type'] for d in combined_data))),
            'error_rates': sorted(list(set(d['error_rate'] for d in combined_data))),
            'dataset_breakdown': qubit_stats,
            'observable_used': 'Z⊗Z⊗...⊗Z (global Z measurement)'
        }
        
        combined_summary_file = f'{base_output_dir}/qem_dataset_combined_full_summary.json'
        with open(combined_summary_file, 'w') as f:
            json.dump(combined_summary, f, indent=2)
        
        print(f"Combined dataset saved to: {combined_output_file}")
        
        return combined_data
    
    def visualize_sample(self, dataset, sample_idx=0):
        if not dataset:
            print("No data to visualize")
            return
            
        sample = dataset[sample_idx]
        print(f"Sample {sample_idx}:")
        print(f"  Circuit ID: {sample['circuit_id']}")
        print(f"  Qubits: {sample['num_qubits']}")
        print(f"  Depth: {sample['depth']}")
        print(f"  Noise: {sample['noise_type']} (rate={sample['error_rate']})")
        print(f"  Noisy expectation: {sample['x_noisy'][0]:.6f}")
        print(f"  Ideal expectation: {sample['x_ideal'][0]:.6f}")

In [None]:
def generate_for_qubit_count(num_qubits, num_circuits=10, base_depth=2, base_output_dir='data'):
    print(f"Generating dataset for {num_qubits} qubits (EfficientSU2)")
    
    ideal_dir = f'{base_output_dir}/{num_qubits}qubit/ideal'
    noisy_dir = f'{base_output_dir}/{num_qubits}qubit/noisy'
    dataset_file = f'{base_output_dir}/{num_qubits}qubit/qem_dataset_{num_qubits}qubit.json'
    
    depth_multipliers = [1, 2, 3]
    entanglement_types = ["linear", "full", "pairwise"]
    noise_types = ['depolarizing', 'amplitude_damping', 'readout']
    error_rates = [0.001, 0.01, 0.1]
    
    print("Generating ideal circuits...")
    generator = CircuitGenerator(num_qubits=num_qubits)
    ideal_dataset = generator.generate_dataset(
        num_circuits=num_circuits,
        base_depth=base_depth,
        depth_multipliers=depth_multipliers,
        entanglement_types=entanglement_types,
        output_dir=ideal_dir
    )
    
    print("Adding noise...")
    noise_injector = NoiseInjector(num_qubits=num_qubits)
    noise_injector.add_noise_to_dataset(
        ideal_dir=ideal_dir,
        noisy_dir=noisy_dir,
        noise_types=noise_types,
        error_rates=error_rates
    )
    
    print("Creating training pairs...")
    integrator = DataIntegrator()
    training_data = integrator.create_training_pairs(
        noisy_dir=noisy_dir,
        output_file=dataset_file
    )
    
    if training_data:
        integrator.visualize_sample(training_data, sample_idx=0)
    
    return training_data

def main():
    QUBIT_COUNTS = [4, 8]
    NUM_CIRCUITS = 10
    BASE_DEPTH = 2
    BASE_OUTPUT_DIR = 'data'
    
    all_datasets = {}
    
    for num_qubits in QUBIT_COUNTS:
        try:
            training_data = generate_for_qubit_count(
                num_qubits=num_qubits,
                num_circuits=NUM_CIRCUITS,
                base_depth=BASE_DEPTH,
                base_output_dir=BASE_OUTPUT_DIR
            )
            all_datasets[f'{num_qubits}qubit'] = training_data
        except Exception as e:
            print(f"Error generating {num_qubits}-qubit dataset: {e}")
            continue
    
    print("Individual datasets complete!")
    
    for num_qubits in QUBIT_COUNTS:
        if f'{num_qubits}qubit' in all_datasets:
            num_samples = len(all_datasets[f'{num_qubits}qubit'])
            expected = NUM_CIRCUITS * 3 * 3 * 3 * 3
            
            print(f"\n{num_qubits} qubits:")
            print(f"  Generated: {num_samples} samples")
            print(f"  Expected: {expected} samples")
            print(f"  Location: data/{num_qubits}qubit/qem_dataset_{num_qubits}qubit.json")
    
    integrator = DataIntegrator()
    combined_data = integrator.create_combined_dataset(
        qubit_counts=QUBIT_COUNTS,
        base_output_dir=BASE_OUTPUT_DIR
    )
    
    print("Dataset generation complete!")

In [None]:
if __name__ == '__main__':
    main()