In [3]:
import os# Import the 'os' module for interacting with the operating system (e.g., file paths)
import pandas as pd# Import pandas library and alias it as 'pd' for data manipulation and analysis
import numpy as np# Import numpy library and alias it as 'np' for numerical operations
import torch# Import PyTorch library and alias it as 'torch' for deep learning tasks
import torch.nn as nn# Import the neural network module from PyTorch for building neural networks
import torch.optim as optim# Import the optimization module from PyTorch for training models
from sklearn.datasets import make_classification# Import 'make_classification' from sklearn.datasets to generate synthetic classification datasets
from sklearn.ensemble import RandomForestClassifier# Import RandomForestClassifier from sklearn.ensemble for classification tasks using Random Forest algorithm
from imblearn.over_sampling import SMOTE# Import SMOTE (Synthetic Minority Over-sampling Technique) from imblearn.over_sampling for handling imbalanced datasets
from imblearn.pipeline import Pipeline# Import Pipeline from imblearn.pipeline to create a sequence of data processing steps
from sklearn.model_selection import train_test_split# Import 'train_test_split' from sklearn.model_selection to split datasets into training and testing sets
from sklearn.manifold import TSNE# Import TSNE (t-Distributed Stochastic Neighbor Embedding) from sklearn.manifold for dimensionality reduction and visualization
import matplotlib.pyplot as plt# Import matplotlib.pyplot as 'plt' for creating plots and visualizations
from sklearn.ensemble import RandomForestRegressor# Import RandomForestRegressor from sklearn.ensemble for regression tasks using Random Forest algorithm
from sklearn.decomposition import PCA# Import PCA (Principal Component Analysis) from sklearn.decomposition for dimensionality reduction
from sklearn.metrics import accuracy_score# Import accuracy_score from sklearn.metrics to evaluate the accuracy of classification models
from sklearn.metrics import mean_squared_error# Import mean_squared_error from sklearn.metrics to evaluate the performance of regression models
from scipy.stats import wasserstein_distance# Import wasserstein_distance from scipy.stats to compute the Wasserstein distance between two distributions

In [4]:
plt.rcParams['font.family'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False

In [None]:
# --------------- Core Parameters ---------------
TASK_TYPE = 'regression'    # Specifies the type of task: 'classification' or 'regression'
DATA_PATH = ''        # Path to the data file
FEATURE_COLS = ['Age','Drinking Water','Food Samples','Soil','Dust','Feces']  # List of feature column names/indices
LABEL_COL = 'Blood Lead Levels'         # Name/index of the label column
SAVE_DIR = ''       
RESULT_PREFIX = 'best_result'   # Prefix for the result files

In [None]:
# --------------- Training Parameters ---------------
EPISODES = 800                   # Number of training episodes (iterations)
BATCH_SIZE = 200                # Size of each batch for training
LEARNING_RATE = 1e-4            # Learning rate for the optimizer
GAMMA = 0.95                    # Discount factor for future rewards (used in reinforcement learning)
HIDDEN_DIM = 256                # Dimension of the hidden layers in the neural network

In [None]:
# ===================== Configuration Constraints =====================
# Constraint Definitions
DATA_CONSTRAINTS = {
    # Expandable constraints for other columns, for example:
    0: (0, 8, 'int'),      # Column 0: Value range from 0 to 8, data type 'int'
    1: (0, 1, 'float'),    # Column 1: Value range from 0 to 1, data type 'float'
    2: (0, 1, 'float'),    # Column 2: Value range from 0 to 1, data type 'float'
    3: (0, 1, 'float'),    # Column 3: Value range from 0 to 1, data type 'float'
    4: (0, 1, 'float'),    # Column 4: Value range from 0 to 1, data type 'float'
    5: (0, 1, 'float')}    # Column 5: Value range from 0 to 1, data type 'float'
}
LABEL_CONSTRAINTS = {
    'min': 0,               # Minimum value for the label
    'max': None,            # Maximum value for the label (None means no upper limit)
    'dtype': 'float'        # Data type of the label
}

In [None]:
# ===================== Data Loading Module =====================
def load_data():
    """Safely load data and validate integrity"""
    try:
        # Check file existence
        if not os.path.exists(DATA_PATH):
            raise FileNotFoundError(f"Data file does not exist: {DATA_PATH}")  
        # Read CSV file into DataFrame
        df = pd.read_csv(DATA_PATH)
        print("Data loaded successfully, columns:", df.columns.tolist())
        # Process feature columns
        if isinstance(FEATURE_COLS[0], str):
            # If FEATURE_COLS contains column names (strings)
            missing = [col for col in FEATURE_COLS if col not in df.columns]
            if missing:
                raise KeyError(f"Missing feature columns: {missing}")
            # Extract feature values and convert to float32
            X = df[FEATURE_COLS].values.astype(np.float32)
            feature_names = FEATURE_COLS  # Save feature column names
        else:
            # If FEATURE_COLS contains column indices (integers)
            feature_names = df.columns[FEATURE_COLS].tolist()  # Get column names from indices
            X = df.iloc[:, FEATURE_COLS].values.astype(np.float32)
        # Process label column
        if isinstance(LABEL_COL, str):
            # If LABEL_COL is a column name (string)
            if LABEL_COL not in df.columns:
                raise KeyError(f"Label column does not exist: {LABEL_COL}")
            # Extract label values
            y = df[LABEL_COL].values
        else:
            # If LABEL_COL is a column index (integer)
            y = df.iloc[:, LABEL_COL].values
        # Reshape y to 1D array
        y = y.reshape(-1)
        # Print data shapes
        print(f"Data shape: X={X.shape}, y={y.shape}")
        return X, y, feature_names  # Return features, labels and feature names
    except Exception as e:
        # Handle any exceptions during data loading
        print(f"\n!! Data loading error: {str(e)} !!")
        exit(1)

In [None]:
# ===================== Reinforcement Learning Environment =====================
class RLEnvironment:
    def __init__(self, X, y):
        """Initialize the RL environment with training data"""
        self.X_orig = X  # Original feature matrix
        self.y_orig = y  # Original target values
        # Split data into training and validation sets (80/20 split)
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
            X, y, test_size=0.2#, random_state=rrrre
        )
        # Initialize the model based on task type
        self.model = self._init_model()
        # Train baseline model and evaluate its performance
        self.baseline = self._train_baseline()
        # Initialize adaptive penalty coefficient
        self.adaptive_coef = 0.5  
        # Current phase identifier ('explore', 'exploit', or 'converge')
        self.phase = 'explore'    
        # Current episode counter
        self.episode = 0
        # History of rewards received
        self.reward_history = []
    def _init_model(self):
        """Initialize model based on task type"""
        return RandomForestClassifier(n_estimators=50) if TASK_TYPE == 'classification' \
               else RandomForestRegressor(n_estimators=50)
    def _train_baseline(self):
        """Train and evaluate baseline model"""
        self.model.fit(self.X_train, self.y_train)
        return self._evaluate()
    def _evaluate(self):
        """Evaluate model performance on validation set"""
        pred = self.model.predict(self.X_val)
        if TASK_TYPE == 'classification':
            return accuracy_score(self.y_val, pred)
        return -mean_squared_error(self.y_val, pred)  # Negative MSE for maximization
    def calculate_reward(self, perf_gain, w_dist):
        """Dynamically calculate reward based on performance and constraints"""
        # Phase-based coefficient adjustment
        if self.phase == 'explore':
            # Coefficient increases linearly from 0.3 to 0.5 during exploration
            coef = max(0.3, 0.5 - 0.02 * self.episode)  
        elif self.phase == 'exploit':
            # Increase penalty when performance decreases during exploitation
            coef = 0.5 + 0.1 * (perf_gain < 0)  
        else: 
            # Strict constraints during convergence phase
            coef = 0.7  
        # Key feature penalty (example: first 3 features)
        key_feature_penalty = 1.5 * np.mean(w_dist[:3])  
        # Total penalty combines general and key feature penalties
        total_penalty = coef * (0.7 * np.mean(w_dist) + 0.3 * key_feature_penalty)
        return perf_gain - total_penalty
    def evaluate_enhancement(self, X_new, y_new):
        """Evaluate the enhancement effect of new data"""
        # Combine original and new data
        X_combined = np.vstack([self.X_train, X_new])
        y_combined = np.concatenate([self.y_train, y_new])
        # Retrain model on combined dataset
        self.model.fit(X_combined, y_combined)
        # Evaluate performance after enhancement
        current_score = self._evaluate()
        perf_gain = current_score - self.baseline
        # Calculate Wasserstein distance for each feature
        w_dist_per_feature = [wasserstein_distance(self.X_train[:,i], X_new[:,i]) 
                              for i in range(self.X_train.shape[1])]
        # Calculate final reward considering both performance and constraints
        reward = self.calculate_reward(perf_gain, w_dist_per_feature)
        # Phase transition logic based on episode count and performance
        self.episode += 1
        self.reward_history.append(reward)
        # Transition to exploit phase if consistently performing well
        if self.episode > 100 and np.mean(self.reward_history[-10:]) > 0:
            self.phase = 'exploit' 
        # Transition to converge phase if feature distributions stabilize
        elif self.episode > 200 and np.mean(w_dist_per_feature) < 0.1:
            self.phase = 'converge'
        return reward, np.mean(w_dist_per_feature)

In [None]:
# ===================== Policy Network =====================
class AugmentPolicy(nn.Module):
    def __init__(self, input_dim):
        """Initialize the policy network"""
        super().__init__()
        # Neural network architecture:
        # - Input layer to hidden layer with ReLU activation
        # - Hidden layer to output layer (2 parameters: alpha and beta)
        self.net = nn.Sequential(
            nn.Linear(input_dim, HIDDEN_DIM),  # Input to hidden layer
            nn.ReLU(),  # Activation function
            nn.Linear(HIDDEN_DIM, 2)  # Output alpha and beta parameters
        )
        # Learnable log standard deviation parameter
        # Initialized to zeros, will be exponentiated during forward pass
        self.log_std = nn.Parameter(torch.zeros(2))
    def forward(self, x):
        """Forward pass of the policy network"""
        # Compute mean parameters through the network
        means = self.net(x)
        # Compute standard deviation from log_std parameter
        # Apply exponential and clamp to avoid numerical instability
        std = torch.exp(self.log_std).clamp(min=1e-6)
        return means, std

In [None]:
# ===================== Reinforcement Learning Agent =====================
class AugmentAgent:
    def __init__(self, input_dim):
        """Initialize the RL agent with policy network and optimizer"""
        # Policy network that generates augmentation parameters
        self.policy = AugmentPolicy(input_dim)
        # Adam optimizer for policy network parameters
        self.optimizer = optim.Adam(self.policy.parameters(), lr=LEARNING_RATE)
    def generate_actions(self, states):
        """Generate augmentation parameters (alpha and beta) for given states"""
        # Convert input states to PyTorch tensor
        states = torch.FloatTensor(states)
        # Disable gradient calculation during action generation
        with torch.no_grad():
            # Get mean and standard deviation from policy network
            means, stds = self.policy(states)
            # Create normal distribution with learned parameters
            dist = torch.distributions.Normal(means, stds)
            # Sample augmentation parameters from the distribution
            actions = dist.sample()
        # Convert actions to numpy array for further processing
        return actions.numpy()
    def update_policy(self, states, actions, rewards):
        """Update policy network using policy gradient method"""
        # Convert all inputs to PyTorch tensors
        states = torch.FloatTensor(states)
        actions = torch.FloatTensor(actions)
        rewards = torch.FloatTensor(rewards)
        # Calculate discounted returns (Monte Carlo returns)
        discounted_returns = []
        G = 0.0  # Initialize return
        for r in reversed(rewards.numpy()):
            G = r + GAMMA * G  # G_t = r_t + γ * G_{t+1}
            discounted_returns.insert(0, G)  # Insert at beginning to maintain order
        # Normalize returns to reduce variance in policy gradient
        returns_tensor = torch.FloatTensor(discounted_returns)
        returns_norm = (returns_tensor - returns_tensor.mean()) / (returns_tensor.std() + 1e-6)
        # Compute policy gradient loss
        means, stds = self.policy(states)
        dist = torch.distributions.Normal(means, stds)
        # Calculate log probabilities of taken actions
        log_probs = dist.log_prob(actions).sum(dim=1)
        # Policy gradient loss: -E[logπ(a|s) * G]
        loss = -(log_probs * returns_norm).mean()
        # Perform policy optimization step
        self.optimizer.zero_grad()  # Clear previous gradients
        loss.backward()  # Backpropagate loss
        nn.utils.clip_grad_norm_(self.policy.parameters(), 1.0)  # Gradient clipping
        self.optimizer.step()  # Update parameters
        # Note: The clipping helps prevent exploding gradients

In [None]:
# ===================== Data Range Validation Module =====================
class DataValidator:
    @staticmethod
    def apply_constraints(data, constraints):
        """
        Apply constraint conditions to generated data
        Parameters:
            data: Input data matrix to be processed (numpy.ndarray)
            constraints: Dictionary format {column_index: (min_val, max_val, dtype)}
        """
        # Create a copy of the input data to avoid modifying the original
        constrained_data = data.copy()
        # Iterate through each column constraint
        for col_idx, (min_val, max_val, dtype) in constraints.items():
            # Apply range limitation to the specified column
            constrained_data[:, col_idx] = np.clip(
                constrained_data[:, col_idx], 
                min_val, 
                max_val
            )

            # Apply data type conversion based on the specified dtype
            if dtype == 'int':
                # Round to nearest integer and convert to integer type
                constrained_data[:, col_idx] = np.round(
                    constrained_data[:, col_idx]
                ).astype(int)
            elif dtype == 'float':
                # Convert to floating point type
                constrained_data[:, col_idx] = constrained_data[:, col_idx].astype(float)
        
        # Return the data after applying all constraints
        return constrained_data

In [None]:
# ===================== Data Generator =====================
class DataGenerator:
    @staticmethod
    def generate_batch(X, y, actions, feature_constraints=None, label_constraints=None):
        """Generate augmented data batch (fixed version)"""
        # Transform action parameters to appropriate ranges
        alphas = (actions[:,0] + 1) / 2   # Map from [-1,1] to [0,1]
        betas = (actions[:,1] + 1) * 0.1  # Map from [-1,1] to [0,0.2]
        X_new = []  # Container for generated feature samples
        y_new = []  # Container for generated label samples
        # Pre-generate all sample indices for efficiency
        indices = np.random.choice(len(X), (len(actions), 2), replace=True)
        for i in range(len(actions)):
            # Get transformed action parameters for current sample
            alpha = alphas[i]
            beta = betas[i]
            # Randomly select two sample indices for interpolation
            idx1, idx2 = indices[i]
            # ================= Feature Generation =================
            # Linear interpolation between two samples with added noise
            sample = alpha * X[idx1] + (1-alpha) * X[idx2] + beta * np.random.randn(*X[0].shape)
            # Apply feature constraints if specified
            if feature_constraints:
                # Reshape for constraint application and then flatten back
                sample = DataValidator.apply_constraints(
                    sample.reshape(1,-1), 
                    feature_constraints
                ).flatten()
            X_new.append(sample)
            # ================= Label Generation =================
            if TASK_TYPE == 'classification':
                # For classification: randomly choose one of the two labels
                label = y[idx1] if np.random.rand() > 0.5 else y[idx2]
            else:
                # For regression: weighted average of the two labels
                label = alpha * y[idx1] + (1-alpha) * y[idx2]
                # Apply label constraints if specified
                if label_constraints:
                    # Clip label to specified min/max range
                    label = np.clip(label, 
                                  label_constraints.get('min', -np.inf),
                                  label_constraints.get('max', np.inf))
                    # Type conversion based on specified dtype
                    dtype = label_constraints.get('dtype', 'float')
                    if dtype == 'int':
                        label = int(round(label))
                    elif dtype == 'float':
                        label = float(label)
            y_new.append(label)
        # Convert lists to numpy arrays before returning
        return np.array(X_new), np.array(y_new)

In [None]:
# ===================== Visualization Module =====================
class ResultVisualizer:
    @staticmethod
    def plot_pca_comparison(original, augmented, save_path):
        """PCA projection comparison visualization"""
        # Initialize PCA with 2 components for 2D visualization
        pca = PCA(n_components=2) 
        # Combine original and augmented data for joint projection
        combined = np.vstack([original, augmented])
        # Fit PCA and transform the combined data
        projected = pca.fit_transform(combined)
        # Create visualization figure
        plt.figure(figsize=(10, 6))
        # Plot original data points in 2D PCA space
        plt.scatter(
            projected[:len(original),0], 
            projected[:len(original),1], 
            alpha=0.3, 
            label='Original data'  # Original data
        )
        # Plot augmented data points in 2D PCA space
        plt.scatter(
            projected[len(original):,0], 
            projected[len(original):,1], 
            alpha=0.3, 
            label='Augmented data'  # Augmented data
        )
        # Set title with explained variance ratio
        plt.title(
            f"PCA projection comparison (Explained variance ratio: {pca.explained_variance_ratio_.sum():.2f})"
            # PCA projection comparison (Explained variance ratio)
        )
        # Add legend to distinguish data types
        plt.legend()
        # Save the visualization to file
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()
    @staticmethod
    def save_training_history(history, save_path):
        """Training history visualization with split curves"""
        # Create a wider figure for better visualization
        plt.figure(figsize=(15, 6))
        # ----------------- Reward Curve Subplot -----------------
        plt.subplot(1, 2, 1)  # Left subplot position (1 row, 2 columns, first plot)
        # Plot reward history with custom styling
        plt.plot(
            history['episode'], 
            history['reward'], 
            color='#2e7d32',  # Green color
            linewidth=2, 
            label='Combined reward'  # Combined reward
        )
        # Set title and labels for reward subplot
        plt.title('Reward trend', fontsize=14, pad=20)  # Reward trend
        plt.xlabel('Training episodes', fontsize=12)  # Training episodes
        plt.ylabel('Reward value', fontsize=12)  # Reward value
        plt.grid(True, alpha=0.3)  # Add grid with transparency
        plt.legend()  # Add legend
        # ----------------- Wasserstein Distance Curve Subplot -----------------
        plt.subplot(1, 2, 2)  # Right subplot position (second plot)
        # Plot W-distance history with custom styling
        plt.plot(
            history['episode'], 
            history['w_dist'], 
            color='#c62828',  # Red color
            linewidth=2, 
            label='Wasserstein distance'  # Wasserstein distance
        ) 
        # Set title and labels for W-distance subplot
        plt.title('Data distribution distance trend', fontsize=14, pad=20)  # Data distribution distance trend
        plt.xlabel('Training episodes', fontsize=12)  # Training episodes
        plt.ylabel('W-distance', fontsize=12)  # W-distance
        plt.grid(True, alpha=0.3)  # Add grid with transparency
        plt.legend()  # Add legend
        # Adjust subplot spacing and save the figure
        plt.tight_layout(pad=3.0)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

In [None]:
# ===================== Enhanced Data Saving Module =====================
class DataSaver:
    @staticmethod
    def save_enhanced_data(original_data, original_labels, enhanced_data, enhanced_labels, 
                          feature_names, label_name, save_dir, prefix):
        """Save both enhanced data and combined dataset with proper metadata
        Added enhanced_labels parameter"""
        # Create save directory if it doesn't exist
        os.makedirs(save_dir, exist_ok=True)
        # Generate timestamp for unique filenames
        timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
        base_name = f"{prefix}_{timestamp}"
        # ================= Save Enhanced Data =================
        # Create DataFrame for enhanced data with original feature names
        enhanced_df = pd.DataFrame(enhanced_data, columns=feature_names)
        # Add labels to the DataFrame using original label column name
        enhanced_df[label_name] = enhanced_labels
        # Save enhanced data as CSV with original column names
        enhanced_df.to_csv(os.path.join(save_dir, f"{base_name}_enhanced.csv"), index=False)
        # Also save as numpy array for potential later use
        np.save(os.path.join(save_dir, f"{base_name}_enhanced.npy"), enhanced_data)
        # ================= Save Combined Dataset =================
        # Combine original and enhanced data
        combined_data = np.vstack([original_data, enhanced_data])
        combined_labels = np.concatenate([original_labels, enhanced_labels])
        # Create DataFrame for combined dataset
        combined_df = pd.DataFrame(combined_data, columns=feature_names)
        # Add labels to the combined DataFrame
        combined_df[label_name] = combined_labels
        # Save combined dataset as CSV
        combined_df.to_csv(os.path.join(save_dir, f"{base_name}_full.csv"), index=False)
        # ================= Save Metadata =================
        # Calculate Wasserstein distance between original and enhanced features
        w_dist = np.mean([
            wasserstein_distance(original_data[:,i], enhanced_data[:,i]) 
            for i in range(original_data.shape[1])
        ])
        # Create metadata dictionary
        meta = {
            'original_samples': original_data.shape[0],  # Number of original samples
            'enhanced_samples': enhanced_data.shape[0],   # Number of enhanced samples
            'w_distance': w_dist,                         # Average Wasserstein distance
            'generation_time': timestamp                  # Generation timestamp
        }
        # Save metadata as CSV
        pd.Series(meta).to_csv(os.path.join(save_dir, f"{base_name}_meta.csv"))
def plot_adaptive_coef(history):
    """Plot the dynamic penalty coefficient change trend"""
    plt.figure(figsize=(10,4))
    plt.plot(
        history['episode'], 
        history['coef'], 
        color='#2e7d32',  # Green color
        linewidth=2
    )
    plt.title('Dynamic penalty coefficient trend')  # Dynamic penalty coefficient trend
    plt.xlabel('Training episodes')  # Training episodes
    plt.ylabel('Penalty coefficient')  # Penalty coefficient
    plt.grid(True, alpha=0.3)  # Add grid with transparency
    # Save plot to specified directory
    plt.savefig(os.path.join(SAVE_DIR, 'dynamic_coef.png'), dpi=300)
    plt.close()  # Close the figure to free memory
def log_phase_transition(phase_log):
    """Visualize the distribution of training phases"""
    phases = ['explore', 'exploit', 'converge']
    phase_counts = [phase_log.count(p) for p in phases]
    plt.figure(figsize=(8,6))
    plt.pie(
        phase_counts, 
        labels=phases, 
        autopct='%1.1f%%',
        colors=['#ff9999','#66b3ff','#99ff99']  # Custom colors for each phase
    )
    plt.title('Training phase distribution ratio')  # Training phase distribution ratio
    # Save pie chart to specified directory
    plt.savefig(os.path.join(SAVE_DIR, 'phase_dist.png'), dpi=300)
    plt.close()  # Close the figure to free memory

In [None]:
# ===================== Main Function =====================
def main():
    """Main training loop for the data augmentation RL system""" 
    # Initialize environment and directories
    os.makedirs(SAVE_DIR, exist_ok=True)
    # Load data and get feature names
    X, y, feature_names = load_data()  # Get feature column names
    # Initialize RL environment and agent
    env = RLEnvironment(X, y)
    agent = AugmentAgent(X.shape[1])
    # Initialize training history recorder
    history = {
        'episode': [],
        'reward': [],
        'w_dist': [],
        'alpha_mean': [],
        'alpha_std': [],
        'beta_mean': [],
        'beta_std': [],
        'coef': [],
        'phase': []
    }
    # Initialize best result tracker
    best = {'w_dist': float('inf'), 'samples': None, 'model': None}
    # Initialize phase transition log
    phase_log = []
    # Training loop
    for ep in range(EPISODES):
        # Generate action parameters for current batch
        states = np.repeat([X.mean(axis=0)], BATCH_SIZE, axis=0)
        actions = agent.generate_actions(states)
        # Generate augmented data with constraints
        X_new, y_new = DataGenerator.generate_batch(
            env.X_train, 
            env.y_train, 
            actions,
            feature_constraints=DATA_CONSTRAINTS,  # Apply feature constraints
            label_constraints=LABEL_CONSTRAINTS  # Apply label constraints
        )
        # Evaluate augmentation effect
        reward, w_dist = env.evaluate_enhancement(X_new, y_new)
        # Update policy network
        agent.update_policy(states, actions, [reward]*BATCH_SIZE)
        # Record training metrics
        history['episode'].append(ep+1)
        history['reward'].append(float(reward))
        history['w_dist'].append(float(w_dist))
        # Record action parameter statistics
        alphas = (actions[:,0] + 1) / 2
        betas = (actions[:,1] + 1) * 0.2
        history['alpha_mean'].append(float(np.mean(alphas)))
        history['alpha_std'].append(float(np.std(alphas)))
        history['beta_mean'].append(float(np.mean(betas)))
        history['beta_std'].append(float(np.std(betas)))
        # Record dynamic penalty coefficient and current phase
        if env.phase == 'explore':
            coef = max(0.3, 0.5 - 0.02 * env.episode)
        elif env.phase == 'exploit':
            coef = 0.5 + 0.1 * (reward < 0)
        else: 
            coef = 0.7
        history['coef'].append(coef)
        history['phase'].append(env.phase)
        phase_log.append(env.phase)
        # Update best results if current performance is better
        if w_dist < best['w_dist']:
            best['w_dist'] = w_dist
            best['samples'] = X_new
            best['model'] = agent.policy.state_dict()
            best.update(w_dist=w_dist, samples=X_new, y_new=y_new, model=agent.policy.state_dict())
            # Visualize PCA comparison when finding better solution
            ResultVisualizer.plot_pca_comparison(
                env.X_orig, 
                X_new, 
                os.path.join(SAVE_DIR, f'{RESULT_PREFIX}_pca.png')
            )
        # Print training progress
        print(f"Episode {ep+1}/{EPISODES}, Reward: {reward:.2f}, W距离: {w_dist:.4f}, Phase: {env.phase}")
    # Save final results
    print(best['w_dist'])
    pd.DataFrame(history).to_csv(
        os.path.join(SAVE_DIR, f'{RESULT_PREFIX}_history.csv'), 
        index=False
    )
    torch.save(
        best['model'], 
        os.path.join(SAVE_DIR, f'{RESULT_PREFIX}_model.pth')
    )
    np.savez(
        os.path.join(SAVE_DIR, f'{RESULT_PREFIX}_samples.npz'), 
        X=best['samples'], 
        y=y_new[:len(best['samples'])]
    )
    # Save training history visualization
    ResultVisualizer.save_training_history(
        history, 
        os.path.join(SAVE_DIR, f'{RESULT_PREFIX}_training.png')
    )
    # Save best augmented data with all required parameters
    if best['samples'] is not None:
        DataSaver.save_enhanced_data(
            original_data=env.X_orig,
            original_labels=env.y_orig,
            enhanced_data=best['samples'],
            enhanced_labels=best['y_new'],
            feature_names=feature_names,        # Pass feature column names
            label_name=LABEL_COL,                # Pass label column name
            save_dir=SAVE_DIR,
            prefix=RESULT_PREFIX
        )
    # Generate final visualizations
    plot_adaptive_coef(history)
    log_phase_transition(phase_log)
if __name__ == "__main__":
    main()