In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/eatvul/preserved_pool_attack.csv
/kaggle/input/eatvul/cwe189_train.csv
/kaggle/input/eatvul/key_token_capture.py
/kaggle/input/eatvul/cwe119_train.csv
/kaggle/input/eatvul/cwe416_test.csv
/kaggle/input/eatvul/cwe399_ast_test.json
/kaggle/input/eatvul/surrogate_test.py
/kaggle/input/eatvul/ori_model.py
/kaggle/input/eatvul/asterisk_ast_test_ADV.json
/kaggle/input/eatvul/ori_model_run.py
/kaggle/input/eatvul/cwe399-huggingface.csv
/kaggle/input/eatvul/cwe399_ast_test_ADV.json
/kaggle/input/eatvul/cwe399_test.csv
/kaggle/input/eatvul/openssl_ast_test_ADV.json
/kaggle/input/eatvul/cwe119_test.csv
/kaggle/input/eatvul/openssl_ast_test.json
/kaggle/input/eatvul/cwe119-huggingface.csv
/kaggle/input/eatvul/cwe20_train.csv
/kaggle/input/eatvul/cwe399_ast_train.json
/kaggle/input/eatvul/cwe-399-v2.csv
/kaggle/input/eatvul/predict_codebert_cwe189.txt
/kaggle/input/eatvul/predict_codebert_cwe399.txt
/kaggle/input/eatvul/fga_selection.py
/kaggle/input/eatvul/openssl_ast_train.json
/ka

In [2]:
# import pandas as pd

# # CWE-399
# train_399 = pd.read_csv('/kaggle/input/eatvul/cwe399_train.csv')
# test_399 = pd.read_csv('/kaggle/input/eatvul/cwe399_test.csv')

# # CWE-119
# train_119 = pd.read_csv('/kaggle/input/eatvul/cwe119_train.csv')
# test_119 = pd.read_csv('/kaggle/input/eatvul/cwe119_test.csv')

# # CWE-20
# train_20 = pd.read_csv('/kaggle/input/eatvul/cwe20_train.csv')
# test_20 = pd.read_csv('/kaggle/input/eatvul/cwe20_test.csv')

# # CWE-416
# train_416 = pd.read_csv('/kaggle/input/eatvul/cwe416_train.csv')
# test_416 = pd.read_csv('/kaggle/input/eatvul/cwe416_test.csv')

# # CWE-189
# train_189 = pd.read_csv('/kaggle/input/eatvul/cwe189_train.csv')
# test_189 = pd.read_csv('/kaggle/input/eatvul/cwe189_test.csv')


# Train baseline models

In [9]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from transformers import RobertaTokenizerFast, RobertaModel, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
import matplotlib.pyplot as plt
try:
    import seaborn as sns
except ImportError:
    print("Warning: Seaborn not installed. Some visualizations may not work.")
    sns = None
from tqdm import tqdm
import os
import re
import gc
import json
import zipfile
from datetime import datetime
from torch.optim import AdamW

# Set device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


class CodePreprocessor:
    """Preprocess code for CodeBERT model"""
    
    def __init__(self):
        self.tokenizer = RobertaTokenizerFast.from_pretrained("microsoft/codebert-base")
        self.max_length = 512  # Maximum sequence length for CodeBERT
    
    def preprocess_code(self, code_text):
        """Basic preprocessing of code text"""
        # Remove extra whitespace
        code_text = re.sub(r'\s+', ' ', code_text)
        code_text = code_text.strip()
        return code_text
    
    def tokenize(self, code_text, truncation=True, padding='max_length', return_tensors=None):
        """Tokenize code text using RobertaTokenizerFast"""
        processed_code = self.preprocess_code(code_text)
        return self.tokenizer(processed_code, 
                             truncation=truncation, 
                             max_length=self.max_length,
                             padding=padding,
                             return_tensors=return_tensors)

class CodeDataset(Dataset):
    """Dataset for code vulnerability detection using CodeBERT"""
    
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer(text, 
                                 truncation=True,
                                 max_length=self.max_length,
                                 padding='max_length',
                                 return_tensors='pt')
        
        # Remove batch dimension added by tokenizer when return_tensors='pt'
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'label': torch.tensor(label, dtype=torch.long)
        }

class CodeBERTClassifier(nn.Module):
    """CodeBERT model for code vulnerability detection"""
    
    def __init__(self, freeze_bert=False, dropout_rate=0.1):
        super(CodeBERTClassifier, self).__init__()
        
        # Load pre-trained CodeBERT model
        self.codebert = RobertaModel.from_pretrained("microsoft/codebert-base")
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.codebert.config.hidden_size, 2)  # Binary classification
        
        # Freeze CodeBERT layers if specified
        if freeze_bert:
            for param in self.codebert.parameters():
                param.requires_grad = False
    
    def forward(self, input_ids, attention_mask):
        # Get CodeBERT outputs
        outputs = self.codebert(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use the [CLS] token representation for classification
        pooled_output = outputs.pooler_output
        
        # Apply dropout and classify
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        
        return logits

class CodeBERTTrainer:
    """Trainer for CodeBERT model"""
    
    def __init__(self, data_path=None, batch_size=8, epochs=4, learning_rate=2e-5):
        self.preprocessor = CodePreprocessor()
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.data = None
        self.model = None
        self.best_model_state = None
        self.best_val_accuracy = 0.0
        self.history = {
            'train_loss': [],
            'val_loss': [],
            'val_accuracy': [],
            'val_precision': [],
            'val_recall': [],
            'val_f1': []
        }
        self.output_dir = os.path.join(os.getcwd(), 'codebert_outputs')
        os.makedirs(self.output_dir, exist_ok=True)
        
        if data_path:
            self.load_data(data_path)
    
    def load_data(self, data_path):
        """
        Load data from file or DataFrame
        
        Args:
            data_path: Path to a data file (CSV, Excel, JSON) or a pandas DataFrame
        """
        print(f"DEBUG: Type of data_path in load_data: {type(data_path)}")
        
        # If data_path is already a DataFrame, use it directly
        if isinstance(data_path, pd.DataFrame):
            self.data = data_path
            print(f"Using provided DataFrame with {len(self.data)} samples.")
            
        # If it's a string, try to load from file
        elif isinstance(data_path, str):
            print(f"DEBUG: Trying to load from file path: '{data_path}'")
            
            # Check if the file exists
            if not os.path.exists(data_path):
                raise FileNotFoundError(f"File not found: '{data_path}'")
                
            file_ext = os.path.splitext(data_path.lower())[1]
            print(f"DEBUG: File extension detected: '{file_ext}'")
            
            if file_ext == '.csv':
                self.data = pd.read_csv(data_path)
            elif file_ext in ['.xls', '.xlsx']:
                self.data = pd.read_excel(data_path)
            elif file_ext == '.json':
                self.data = pd.read_json(data_path)
            elif file_ext == '.pkl' or file_ext == '.pickle':
                self.data = pd.read_pickle(data_path)
            elif file_ext == '':
                # Try to infer the format if no extension is given
                try:
                    # First try CSV as it's most common
                    self.data = pd.read_csv(data_path)
                    print(f"Inferred file format as CSV for: {data_path}")
                except:
                    try:
                        # Then try JSON
                        self.data = pd.read_json(data_path)
                        print(f"Inferred file format as JSON for: {data_path}")
                    except:
                        raise ValueError(f"Could not determine file format for: '{data_path}'. Please specify a file with extension or provide a DataFrame.")
            else:
                raise ValueError(f"Unsupported file format: '{file_ext}'. Supported formats: CSV, Excel, JSON, Pickle")
        else:
            raise TypeError(f"data_path must be either a string file path or a pandas DataFrame, got {type(data_path).__name__}")
        
        # Check if required columns exist
        if 'functionSource' not in self.data.columns or 'label' not in self.data.columns:
            raise ValueError("Data must contain 'functionSource' and 'label' columns.")
        
        print(f"Loaded data with {len(self.data)} samples.")
        print(f"Label distribution: {self.data['label'].value_counts().to_dict()}")
    
    def set_data(self, dataframe):
        """Set data directly from a pandas DataFrame"""
        if not isinstance(dataframe, pd.DataFrame):
            raise ValueError("Input must be a pandas DataFrame.")
        
        # Check if required columns exist
        if 'functionSource' not in dataframe.columns or 'label' not in dataframe.columns:
            raise ValueError("Data must contain 'functionSource' and 'label' columns.")
        
        self.data = dataframe
        print(f"Set data with {len(self.data)} samples.")
        print(f"Label distribution: {self.data['label'].value_counts().to_dict()}")
    
    def prepare_data(self, train_data, test_data):
        """Prepare data for model training using pre-split train and test data"""
        if self.data is None and (train_data is None or test_data is None):
            raise ValueError("No data provided. Provide train_data and test_data or call load_data/set_data first.")
        
        # Use provided train and test data
        train_texts = train_data['functionSource'].values
        train_labels = train_data['label'].values
        test_texts = test_data['functionSource'].values
        test_labels = test_data['label'].values
        
        # Create datasets
        train_dataset = CodeDataset(
            train_texts, 
            train_labels, 
            self.preprocessor.tokenizer, 
            self.preprocessor.max_length
        )
        
        test_dataset = CodeDataset(
            test_texts, 
            test_labels, 
            self.preprocessor.tokenizer, 
            self.preprocessor.max_length
        )
        
        # Create data loaders
        train_loader = DataLoader(
            train_dataset,
            batch_size=self.batch_size,
            shuffle=True
        )
        
        test_loader = DataLoader(
            test_dataset,
            batch_size=self.batch_size,
            shuffle=False
        )
        
        print(f"Train samples: {len(train_dataset)}")
        print(f"Test samples: {len(test_dataset)}")
        
        return {
            'train_loader': train_loader,
            'val_loader': test_loader,  # Use test loader for validation
            'test_loader': test_loader,
            'test_texts': test_texts,
            'test_labels': test_labels
        }
    
    def run_all(self, data_source=None, train_data=None, test_data=None, freeze_bert=False, dataset_name="test"):
        """Run all steps: data preparation, training, evaluation, and saving"""
        # Load data if provided as a single source
        if data_source is not None:
            if isinstance(data_source, str):
                self.load_data(data_source)
            elif isinstance(data_source, pd.DataFrame):
                self.set_data(data_source)
        
        # If train_data and test_data are provided, use them; otherwise, ensure data is loaded
        if train_data is not None and test_data is not None:
            if not isinstance(train_data, pd.DataFrame) or not isinstance(test_data, pd.DataFrame):
                raise ValueError("train_data and test_data must be pandas DataFrames.")
            if 'functionSource' not in train_data.columns or 'label' not in train_data.columns:
                raise ValueError("train_data must contain 'functionSource' and 'label' columns.")
            if 'functionSource' not in test_data.columns or 'label' not in test_data.columns:
                raise ValueError("test_data must contain 'functionSource' and 'label' columns.")
            print(f"Using provided train_data with {len(train_data)} samples.")
            print(f"Using provided test_data with {len(test_data)} samples.")
        elif self.data is None:
            raise ValueError("No data loaded. Provide data_source or train_data/test_data.")
        
        # Prepare data using provided train/test split or loaded data
        if train_data is not None and test_data is not None:
            data_loaders = self.prepare_data(train_data, test_data)
        else:
            data_loaders = self.prepare_data(self.data, self.data)  # Fallback (though not used in your case)
        
        # Train model
        self.train_model(data_loaders, freeze_bert=freeze_bert)
        
        # Plot training history
        self.plot_training_history()
        
        # Evaluate model with dataset name for proper file naming
        results = self.evaluate_model(data_loaders['test_loader'], dataset_name=dataset_name)
        
        # Save model
        model_dir = self.save_model()
        
        # Save evaluation results
        with open(os.path.join(model_dir, 'evaluation_results.json'), 'w') as f:
            # Convert numpy values to Python types for JSON serialization
            serializable_results = {
                k: v if not isinstance(v, np.ndarray) else v.tolist()
                for k, v in results.items()
            }
            json.dump(serializable_results, f)
        
        print("\n=== Training and Evaluation Complete ===")
        print(f"All outputs saved to: {model_dir}")
        
        # Free up GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        return results
    
    def train_model(self, data_loaders, freeze_bert=False):
        """Train the CodeBERT model"""
        # Initialize model
        self.model = CodeBERTClassifier(freeze_bert=freeze_bert)
        self.model.to(device)
        
        # Define optimizer and scheduler
        optimizer = AdamW(self.model.parameters(), lr=self.learning_rate)
        
        # Calculate total training steps for learning rate scheduler
        total_steps = len(data_loaders['train_loader']) * self.epochs
        
        # Create learning rate scheduler
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=total_steps
        )
        
        # Define loss function
        criterion = CrossEntropyLoss()
        
        # Training loop
        print("\n=== Training CodeBERT Model ===")
        
        for epoch in range(self.epochs):
            print(f"\nEpoch {epoch+1}/{self.epochs}")
            
            # Training phase
            self.model.train()
            train_loss = 0.0
            
            progress_bar = tqdm(data_loaders['train_loader'], desc="Training")
            for batch in progress_bar:
                # Move batch to device
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)
                
                # Zero gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = self.model(input_ids, attention_mask)
                
                # Calculate loss
                loss = criterion(outputs, labels)
                
                # Backward pass
                loss.backward()
                
                # Update parameters
                optimizer.step()
                scheduler.step()
                
                # Update training loss
                train_loss += loss.item()
                progress_bar.set_postfix({'loss': loss.item()})
            
            # Calculate average training loss
            avg_train_loss = train_loss / len(data_loaders['train_loader'])
            self.history['train_loss'].append(avg_train_loss)
            
            # Validation phase
            self.model.eval()
            val_loss = 0.0
            val_predictions = []
            val_true_labels = []
            
            with torch.no_grad():
                for batch in tqdm(data_loaders['val_loader'], desc="Validation"):
                    # Move batch to device
                    input_ids = batch['input_ids'].to(device)
                    attention_mask = batch['attention_mask'].to(device)
                    labels = batch['label'].to(device)
                    
                    # Forward pass
                    outputs = self.model(input_ids, attention_mask)
                    
                    # Calculate loss
                    loss = criterion(outputs, labels)
                    
                    # Update validation loss
                    val_loss += loss.item()
                    
                    # Get predictions
                    _, preds = torch.max(outputs, dim=1)
                    
                    # Store predictions and true labels
                    val_predictions.extend(preds.cpu().tolist())
                    val_true_labels.extend(labels.cpu().tolist())
            
            # Calculate average validation loss
            avg_val_loss = val_loss / len(data_loaders['val_loader'])
            self.history['val_loss'].append(avg_val_loss)
            
            # Calculate validation metrics
            val_accuracy = accuracy_score(val_true_labels, val_predictions)
            val_precision, val_recall, val_f1, _ = precision_recall_fscore_support(
                val_true_labels, val_predictions, average='binary'
            )
            
            self.history['val_accuracy'].append(val_accuracy)
            self.history['val_precision'].append(val_precision)
            self.history['val_recall'].append(val_recall)
            self.history['val_f1'].append(val_f1)
            
            print(f"Training Loss: {avg_train_loss:.4f}")
            print(f"Validation Loss: {avg_val_loss:.4f}")
            print(f"Validation Accuracy: {val_accuracy:.4f}")
            print(f"Validation Precision: {val_precision:.4f}")
            print(f"Validation Recall: {val_recall:.4f}")
            print(f"Validation F1: {val_f1:.4f}")
            
            # Save best model
            if val_accuracy > self.best_val_accuracy:
                self.best_val_accuracy = val_accuracy
                self.best_model_state = self.model.state_dict().copy()
                print(f"New best model with validation accuracy: {val_accuracy:.4f}")
        
        # Load best model for testing
        if self.best_model_state is not None:
            self.model.load_state_dict(self.best_model_state)
            print(f"Loaded best model with validation accuracy: {self.best_val_accuracy:.4f}")
        
        return self.model
    
    def evaluate_model(self, test_loader, dataset_name="test", export_predictions=True):
        """Evaluate the model on test data"""
        if self.model is None:
            raise ValueError("No model trained. Call train_model first.")
        
        print("\n=== Evaluating Model on Test Set ===")
        
        # Explicitly load the best model state for evaluation
        if self.best_model_state is not None:
            print(f"Loading best model with validation accuracy: {self.best_val_accuracy:.4f}")
            self.model.load_state_dict(self.best_model_state)
            self.model.eval()
        else:
            print("Warning: No best model state found, using current model state")
            self.model.eval()
        
        test_predictions = []
        test_true_labels = []
        
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Testing"):
                # Move batch to device
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)
                
                # Forward pass
                outputs = self.model(input_ids, attention_mask)
                
                # Get predictions
                _, preds = torch.max(outputs, dim=1)
                
                # Store predictions and true labels
                test_predictions.extend(preds.cpu().tolist())
                test_true_labels.extend(labels.cpu().tolist())
        
        # Calculate test metrics
        test_accuracy = accuracy_score(test_true_labels, test_predictions)
        test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(
            test_true_labels, test_predictions, average='binary'
        )
        
        # Generate classification report
        class_report = classification_report(test_true_labels, test_predictions)
        
        # Generate confusion matrix
        conf_matrix = confusion_matrix(test_true_labels, test_predictions)
        
        print(f"\n=== BEST MODEL EVALUATION RESULTS ===")
        if self.best_model_state is not None:
            print(f"Using best model from training (Validation Accuracy: {self.best_val_accuracy:.4f})")
        print(f"Test Accuracy: {test_accuracy:.4f}")
        print(f"Test Precision: {test_precision:.4f}")
        print(f"Test Recall: {test_recall:.4f}")
        print(f"Test F1: {test_f1:.4f}")
        print("\n=== DETAILED CLASSIFICATION REPORT (BEST MODEL) ===")
        print(class_report)
        print("="*60)
        
        # Plot confusion matrix
        plt.figure(figsize=(8, 6))
        if sns is not None:
            sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                       xticklabels=['Not Vulnerable', 'Vulnerable'],
                       yticklabels=['Not Vulnerable', 'Vulnerable'])
        else:
            # Fallback to matplotlib if seaborn is not available
            plt.imshow(conf_matrix, interpolation='nearest', cmap='Blues')
            plt.colorbar()
            # Add text annotations
            for i in range(conf_matrix.shape[0]):
                for j in range(conf_matrix.shape[1]):
                    plt.text(j, i, str(conf_matrix[i, j]), 
                            ha='center', va='center', color='black')
            plt.xticks([0, 1], ['Not Vulnerable', 'Vulnerable'])
            plt.yticks([0, 1], ['Not Vulnerable', 'Vulnerable'])
        
        plt.title(f'Confusion Matrix - Best Model (Val Acc: {self.best_val_accuracy:.4f})')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig(os.path.join(self.output_dir, 'confusion_matrix.png'))
        plt.close()
        
        results = {
            'accuracy': test_accuracy,
            'precision': test_precision,
            'recall': test_recall,
            'f1': test_f1,
            'classification_report': class_report,
            'confusion_matrix': conf_matrix.tolist(),
            'predictions': test_predictions,
            'true_labels': test_true_labels,
            'best_val_accuracy': self.best_val_accuracy
        }
        
        # Export predictions if requested
        if export_predictions:
            export_path = self.export_predictions(
                test_predictions, 
                test_true_labels, 
                dataset_name
            )
            results['export_path'] = export_path
        
        return results
    
    def plot_training_history(self):
        """Plot training history"""
        if not self.history['train_loss']:
            print("No training history to plot.")
            return
        
        # Plot loss
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.plot(self.history['train_loss'], label='Training')
        plt.plot(self.history['val_loss'], label='Validation')
        plt.title('Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Plot metrics
        plt.subplot(1, 2, 2)
        plt.plot(self.history['val_accuracy'], label='Accuracy')
        plt.plot(self.history['val_precision'], label='Precision')
        plt.plot(self.history['val_recall'], label='Recall')
        plt.plot(self.history['val_f1'], label='F1')
        plt.title('Validation Metrics')
        plt.xlabel('Epoch')
        plt.ylabel('Score')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(os.path.join(self.output_dir, 'training_history.png'))
        plt.close()
    
    def save_model(self):
        """Save trained model and tokenizer, and create a zip archive"""
        if self.model is None:
            print("No model to save.")
            return
        
        # Create timestamp for unique folder
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        model_dir = os.path.join(self.output_dir, f'model')
        os.makedirs(model_dir, exist_ok=True)
        
        # Save model
        if self.best_model_state is not None:
            torch.save(self.best_model_state, os.path.join(model_dir, 'best_model.pt'))
        else:
            torch.save(self.model.state_dict(), os.path.join(model_dir, 'model.pt'))
        
        # Save model configuration
        model_config = {
            'hidden_size': self.model.codebert.config.hidden_size,
            'vocab_size': self.model.codebert.config.vocab_size,
            'num_labels': 2,
            'max_length': self.preprocessor.max_length
        }
        
        with open(os.path.join(model_dir, 'model_config.json'), 'w') as f:
            json.dump(model_config, f)
        
        # Save tokenizer
        self.preprocessor.tokenizer.save_pretrained(model_dir)
        
        # Save training history
        with open(os.path.join(model_dir, 'training_history.json'), 'w') as f:
            json.dump(self.history, f)
        
        print(f"Model saved to {model_dir}")
        
        # Create zip archive of the model directory
        zip_path = f"{model_dir}_{timestamp}.zip"
        try:
            print(f"Creating zip archive: {zip_path}")
            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
                # Walk through the model directory and add all files
                for root, dirs, files in os.walk(model_dir):
                    for file in files:
                        file_path = os.path.join(root, file)
                        # Create archive path relative to the model directory
                        arcname = os.path.relpath(file_path, os.path.dirname(model_dir))
                        zipf.write(file_path, arcname)
            
            # Get zip file size for user feedback
            zip_size = os.path.getsize(zip_path)
            zip_size_mb = zip_size / (1024 * 1024)
            print(f"✅ Model archive created successfully: {zip_path}")
            print(f"📦 Archive size: {zip_size_mb:.2f} MB")
            
        except Exception as e:
            print(f"⚠️ Warning: Could not create zip archive: {str(e)}")
            print(f"Model files are still available in: {model_dir}")
        
        return model_dir
    
    def load_model(self, model_dir):
        """
        Load a previously saved CodeBERT model from the specified directory
        
        Args:
            model_dir: Path to the directory containing the saved model
            
        Returns:
            The loaded CodeBERTClassifier model
        """
        if not os.path.exists(model_dir):
            raise ValueError(f"Model directory {model_dir} does not exist")
            
        print(f"Loading model from {model_dir}")
        
        # Check for model config file
        config_path = os.path.join(model_dir, 'model_config.json')
        if not os.path.exists(config_path):
            raise ValueError(f"Model config file not found in {model_dir}")
            
        # Load model configuration
        with open(config_path, 'r') as f:
            model_config = json.load(f)
            
        # Initialize model
        self.model = CodeBERTClassifier()
        self.model.to(device)
        
        # Check for model state file (either best_model.pt or model.pt)
        best_model_path = os.path.join(model_dir, 'best_model.pt')
        model_path = os.path.join(model_dir, 'model.pt')
        
        if os.path.exists(best_model_path):
            state_dict = torch.load(best_model_path, map_location=device)
            print("Loading best model checkpoint")
        elif os.path.exists(model_path):
            state_dict = torch.load(model_path, map_location=device)
            print("Loading regular model checkpoint")
        else:
            raise ValueError(f"No model checkpoint found in {model_dir}")
            
        # Load model state
        self.model.load_state_dict(state_dict)
        self.best_model_state = state_dict
        
        # Load tokenizer if available
        tokenizer_path = os.path.join(model_dir, 'special_tokens_map.json')
        if os.path.exists(tokenizer_path):
            self.preprocessor.tokenizer = RobertaTokenizerFast.from_pretrained(model_dir)
            print("Loaded tokenizer from saved model")
            
        # Load training history if available
        history_path = os.path.join(model_dir, 'training_history.json')
        if os.path.exists(history_path):
            with open(history_path, 'r') as f:
                self.history = json.load(f)
            
            # Set best accuracy from history if available
            if self.history.get('val_accuracy'):
                self.best_val_accuracy = max(self.history['val_accuracy'])
                print(f"Loaded training history. Best validation accuracy: {self.best_val_accuracy:.4f}")
        
        # Set model to evaluation mode
        self.model.eval()
        print("Model loaded successfully and set to evaluation mode")
        
        return self.model
    
    def export_predictions(self, predictions, true_labels=None, dataset_name="test"):
        """
        Export model predictions to a txt file with index and prediction format
        
        Args:
            predictions: List or array of predictions (0 or 1)
            true_labels: Optional list of true labels for comparison
            dataset_name: Name to include in the filename (e.g., "test", "cwe119")
            
        Returns:
            Path to the exported file
        """
        # Create timestamp for unique filename
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        
        # Create filename
        if "cwe" in dataset_name.lower():
            filename = f"predict_codebert_{dataset_name}_{timestamp}.txt"
        else:
            filename = f"predict_codebert_cwe_{timestamp}.txt"
        
        # Full path for the output file
        output_path = os.path.join(self.output_dir, filename)
        
        # Write predictions to file
        with open(output_path, 'w') as f:
            for idx, pred in enumerate(predictions):
                f.write(f"{idx}\t{pred}\n")
        
        print(f"Predictions exported to: {output_path}")
        print(f"Total predictions exported: {len(predictions)}")
        
        # If true labels are provided, also create a comparison file
        if true_labels is not None:
            comparison_filename = f"prediction_comparison_{dataset_name}_{timestamp}.txt"
            comparison_path = os.path.join(self.output_dir, comparison_filename)
            
            with open(comparison_path, 'w') as f:
                f.write("Index\tPrediction\tTrue_Label\tCorrect\n")
                correct_count = 0
                for idx, (pred, true) in enumerate(zip(predictions, true_labels)):
                    is_correct = pred == true
                    if is_correct:
                        correct_count += 1
                    f.write(f"{idx}\t{pred}\t{true}\t{is_correct}\n")
            
            accuracy = correct_count / len(predictions) if len(predictions) > 0 else 0
            print(f"Prediction comparison exported to: {comparison_path}")
            print(f"Accuracy: {accuracy:.4f} ({correct_count}/{len(predictions)})")
        
        return output_path
    
    def evaluate_saved_model(self, model_dir, test_data, dataset_name="test", export_predictions=True):
        """
        Load a saved model and evaluate it on test data
        
        Args:
            model_dir: Path to the directory containing the saved model
            test_data: DataFrame with 'functionSource' and 'label' columns
            dataset_name: Name to include in export filename
            export_predictions: Whether to export predictions to txt file
            
        Returns:
            Dictionary with evaluation results
        """
        # Load the saved model
        print(f"\n=== Loading Saved Model for Evaluation ===")
        self.load_model(model_dir)
        
        # Prepare test data
        test_texts = test_data['functionSource'].tolist()
        test_labels = test_data['label'].tolist()
        
        test_dataset = CodeDataset(test_texts, test_labels, self.preprocessor.tokenizer)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
        
        # Evaluate the loaded model
        results = self.evaluate_model(test_loader, dataset_name=dataset_name, export_predictions=export_predictions)
        
        return results
    
    def predict_dataset(self, test_data, dataset_name="test", export_predictions=True):
        """
        Make predictions on an entire dataset and optionally export them
        
        Args:
            test_data: DataFrame with 'functionSource' and optionally 'label' columns
            dataset_name: Name to include in export filename
            export_predictions: Whether to export predictions to txt file
            
        Returns:
            Dictionary with predictions, true labels (if available), and metrics
        """
        if self.model is None:
            raise ValueError("No model loaded. Call train_model or load_model first.")
        
        print(f"\n=== Making Predictions on {dataset_name} Dataset ===")
        print(f"Dataset size: {len(test_data)} samples")
        
        # Check if labels are available
        has_labels = 'label' in test_data.columns
        
        # Prepare data loader
        test_texts = test_data['functionSource'].tolist()
        test_labels = test_data['label'].tolist() if has_labels else [0] * len(test_texts)
        
        test_dataset = CodeDataset(test_texts, test_labels, self.preprocessor.tokenizer)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
        
        # Make predictions
        self.model.eval()
        predictions = []
        true_labels = []
        
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Making predictions"):
                # Move batch to device
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)
                
                # Forward pass
                outputs = self.model(input_ids, attention_mask)
                
                # Get predictions
                _, preds = torch.max(outputs, dim=1)
                
                # Store predictions and true labels
                predictions.extend(preds.cpu().tolist())
                if has_labels:
                    true_labels.extend(labels.cpu().tolist())
        
        # Calculate metrics if labels are available
        results = {'predictions': predictions}
        
        if has_labels:
            results['true_labels'] = true_labels
            test_accuracy = accuracy_score(true_labels, predictions)
            test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(
                true_labels, predictions, average='binary'
            )
            
            results.update({
                'accuracy': test_accuracy,
                'precision': test_precision,
                'recall': test_recall,
                'f1': test_f1
            })
            
            print(f"Accuracy: {test_accuracy:.4f}")
            print(f"Precision: {test_precision:.4f}")
            print(f"Recall: {test_recall:.4f}")
            print(f"F1: {test_f1:.4f}")
        
        # Export predictions if requested
        if export_predictions:
            export_path = self.export_predictions(
                predictions, 
                true_labels if has_labels else None, 
                dataset_name
            )
            results['export_path'] = export_path
        
        return results
    
    def predict(self, code_text):
        """
        Make a prediction on a single code sample
        
        Args:
            code_text: String containing the code to analyze
            
        Returns:
            Dictionary with prediction results
        """
        if self.model is None:
            raise ValueError("No model loaded. Call train_model or load_model first.")
        
        # Preprocess and tokenize the code
        encoding = self.preprocessor.tokenize(
            code_text, 
            truncation=True,
            padding='max_length',
            return_tensors='pt'
        )
        
        # Move tensors to device
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        
        # Set model to evaluation mode
        self.model.eval()
        
        # Make prediction
        with torch.no_grad():
            outputs = self.model(input_ids, attention_mask)
            probabilities = torch.softmax(outputs, dim=1)
            confidence, prediction = torch.max(probabilities, dim=1)
        
        result = {
            'prediction': prediction.item(),  # 0: not vulnerable, 1: vulnerable
            'confidence': confidence.item(),
            'probabilities': probabilities[0].cpu().numpy().tolist(),
            'label_names': ['Not Vulnerable', 'Vulnerable']
        }
        
        return result

def free_gpu_memory():
    """Free up GPU memory"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

2025-06-02 04:30:08.375408: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748838608.589559      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748838608.645692      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using device: cuda


In [None]:
# Reload data and try with smaller batch size
trainer = CodeBERTTrainer(batch_size=16, epochs=5)
results1 = trainer.run_all(train_data=train_399, test_data=test_399, freeze_bert=False)
results2 = trainer.run_all(train_data=train_119, test_data=test_119, freeze_bert=False)
results3 = trainer.run_all(train_data=train_189, test_data=test_189, freeze_bert=False)
results4 = trainer.run_all(train_data=train_416, test_data=test_416, freeze_bert=False)
results5 = trainer.run_all(train_data=train_20, test_data=test_20, freeze_bert=False)

# Generate adversarial code with Gemini API

In [None]:
# import pandas as pd
# import numpy as np
# from sklearn.svm import LinearSVC
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.utils.data import Dataset, DataLoader
# import re
# from collections import Counter
# import requests
# import json
# import random
# import os
# import csv
# import time

# class CodeDataset(Dataset):
#     def __init__(self, sequences, labels=None):
#         self.sequences = sequences
#         self.labels = labels if labels is not None else np.zeros(len(sequences))
    
#     def __len__(self):
#         return len(self.sequences)
    
#     def __getitem__(self, idx):
#         return torch.tensor(self.sequences[idx], dtype=torch.long), torch.tensor(self.labels[idx], dtype=torch.float)

# class BiLSTMAttention(nn.Module):
#     def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
#         super(BiLSTMAttention, self).__init__()
        
#         self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
#         self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
#         self.fc = nn.Linear(hidden_dim * 2, output_dim)
#         self.dropout = nn.Dropout(dropout)
        
#         # Attention mechanism
#         self.attention = nn.Linear(hidden_dim * 2, 1)
        
#     def forward(self, text, text_lengths=None):
#         # text: [batch size, seq len]
#         embedded = self.embedding(text)
#         # embedded: [batch size, seq len, embedding dim]
        
#         # Pass through LSTM
#         lstm_output, (hidden, cell) = self.lstm(embedded)
#         # lstm_output: [batch size, seq len, hidden dim * 2]
        
#         # Calculate attention weights
#         attention_weights = torch.tanh(self.attention(lstm_output))
#         # attention_weights: [batch size, seq len, 1]
        
#         # Apply softmax to get normalized weights
#         attention_weights = F.softmax(attention_weights, dim=1)
        
#         # Apply attention weights to LSTM outputs
#         context_vector = torch.sum(attention_weights * lstm_output, dim=1)
#         # context_vector: [batch size, hidden dim * 2]
        
#         # Final prediction
#         output = self.fc(self.dropout(context_vector))
#         # output: [batch size, output dim]
        
#         return output, attention_weights

# class AdversarialCodeGenerator:
#     def __init__(self, api_key="AIzaSyB2HPcy0LPZKiN2TihoICDdOU_23mhqfa8", verbose=0):
#         self.api_key = api_key
#         self.gemini_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent"
#         self.verbose = verbose
#         self.request_timestamps = []  # To track request times for rate limiting
#         self.rate_limit = 10  # Requests per minute
#         self.rate_limit_window = 60  # Seconds in a minute
        
#         self.keyword_categories = {
#             'data_type': ['int', 'char', 'float', 'double', 'void', 'struct', 'const', 'unsigned', 'signed'],
#             'control_statement': ['if', 'else', 'for', 'while', 'switch', 'case', 'break', 'continue', 'return'],
#             'storage_classes': ['static', 'extern', 'auto', 'register', 'volatile'],
#             'input_output': ['printf', 'scanf', 'gets', 'puts', 'fgets', 'fputs', 'read', 'write'],
#             'miscellaneous': ['sizeof', 'malloc', 'free', 'null', 'define', 'include', 'typedef']
#         }
        
#         self.prompt_templates = [
#             "Given the partial preceding/succeeding codes as:",
#             "With the partial preceding/following codes provided as:",
#             "In light of the incomplete preceding/following codes as:",
#             "Taking into account the limited preceding/succeeding codes as:"
#         ]
    
#     def categorize_attention_words(self, attention_words):
#         """Categorize attention words based on predefined categories"""
#         categorized = {category: [] for category in self.keyword_categories}
#         uncategorized = []
        
#         for word, score in attention_words:
#             categorized_flag = False
#             for category, keywords in self.keyword_categories.items():
#                 if word.lower() in [k.lower() for k in keywords]:
#                     categorized[category].append((word, score))
#                     categorized_flag = True
#                     break
            
#             if not categorized_flag:
#                 uncategorized.append((word, score))
        
#         return categorized, uncategorized
    
#     def generate_prompt(self, function_code, attention_words):
#         """Generate prompt for Gemini API based on function code and attention words"""
#         categorized_words, uncategorized = self.categorize_attention_words(attention_words)
        
#         template = random.choice(self.prompt_templates)
        
#         features = []
        
#         if categorized_words['data_type']:
#             data_types = [word for word, _ in categorized_words['data_type'][:3]]
#             if data_types:
#                 features.append(f"data types including {', '.join(data_types)}")
        
#         if categorized_words['control_statement']:
#             control_stmts = [word for word, _ in categorized_words['control_statement'][:2]]
#             if control_stmts:
#                 features.append(f"control structures like {', '.join(control_stmts)}")
        
#         if categorized_words['storage_classes']:
#             storage_classes = [word for word, _ in categorized_words['storage_classes'][:2]]
#             if storage_classes:
#                 features.append(f"storage classes such as {', '.join(storage_classes)}")
        
#         if categorized_words['input_output']:
#             io_funcs = [word for word, _ in categorized_words['input_output'][:2]]
#             if io_funcs:
#                 features.append(f"I/O operations using {', '.join(io_funcs)}")
        
#         if uncategorized:
#             top_vars = [f"{word}_var" for word, _ in uncategorized[:3]]
#             if top_vars:
#                 features.append(f"variables named {', '.join(top_vars)}")
        
#         feature_text = ", ".join(features) if features else "basic C programming constructs"
        
#         prompt = f"""You are an expert C programmer tasked with generating adversarial code snippets for vulnerability detection research.

# {template}
# {function_code}

# Based on the attention words: {', '.join([word for word, _ in attention_words[:10]])}, please generate several lines in C (maximum 8 lines) that contain {feature_text}. 

# Requirements:
# 1. Generate the codes in dense format
# 2. Ensure the code is compilable but represents dead code (non-functional)
# 3. Use variable names that relate to the context but don't interfere with original functionality
# 4. Keep it under 8 lines total
# 5. Make it vulnerability-free

# Please provide only the C code without explanations."""
#         return prompt
    
#     def call_gemini_api(self, prompt):
#         """Call Gemini API to generate adversarial code with rate limiting"""
#         # Rate limiting logic
#         current_time = time.time()
#         # Remove timestamps older than 60 seconds
#         self.request_timestamps = [t for t in self.request_timestamps if current_time - t < self.rate_limit_window]
        
#         # Check if rate limit would be exceeded
#         if len(self.request_timestamps) >= self.rate_limit:
#             # Wait until the oldest request is outside the rate limit window
#             wait_time = self.rate_limit_window - (current_time - self.request_timestamps[0])
#             if wait_time > 0:
#                 if self.verbose:
#                     print(f"Rate limit reached. Waiting {wait_time:.2f} seconds...")
#                 time.sleep(wait_time)
#                 # Update current time after waiting
#                 current_time = time.time()
#                 self.request_timestamps = [t for t in self.request_timestamps if current_time - t < self.rate_limit_window]
        
#         # Record the current request timestamp
#         self.request_timestamps.append(current_time)
        
#         headers = {
#             'Content-Type': 'application/json',
#         }
        
#         data = {
#             "contents": [{
#                 "parts": [{
#                     "text": prompt
#                 }]
#             }],
#             "generationConfig": {
#                 "temperature": 0.7,
#                 "topK": 40,
#                 "topP": 0.95,
#                 "maxOutputTokens": 512,
#             }
#         }
        
#         try:
#             response = requests.post(
#                 f"{self.gemini_url}?key={self.api_key}",
#                 headers=headers,
#                 data=json.dumps(data),
#                 timeout=30
#             )
            
#             if response.status_code == 200:
#                 result = response.json()
#                 if 'candidates' in result and len(result['candidates']) > 0:
#                     generated_text = result['candidates'][0]['content']['parts'][0]['text']
#                     return self.clean_generated_code(generated_text)
#                 else:
#                     return "Error: No candidates in response"
#             else:
#                 return f"Error: API call failed with status {response.status_code}: {response.text}"
                
#         except Exception as e:
#             return f"Error calling Gemini API: {str(e)}"
    
#     def clean_generated_code(self, generated_text):
#         """Clean and format the generated code"""
#         cleaned = re.sub(r'```c?\n?', '', generated_text)
#         cleaned = re.sub(r'```', '', cleaned)
        
#         lines = [line.strip() for line in cleaned.split('\n') if line.strip()]
        
#         code_lines = []
#         for line in lines[:8]:
#             if (line.endswith(';') or line.endswith('{') or line.endswith('}') or 
#                 'int ' in line or 'char ' in line or 'float ' in line or 
#                 'if(' in line or 'for(' in line or 'while(' in line):
#                 code_lines.append(line)
        
#         return '\n'.join(code_lines) if code_lines else generated_text.strip()

#     def generate_adversarial_code(self, function_code, attention_words):
#         """Generate adversarial code and return it with the prompt used"""
#         # Generate prompt for API call
#         prompt = self.generate_prompt(function_code, attention_words)
        
#         # Print the prompt being sent to the model
#         if self.verbose:
#             print("\n=== Prompt sent to Gemini API ===")
#             print(prompt)
#             print("=" * 80)
        
#         # Generate adversarial code without validation
#         adversarial_code = self.call_gemini_api(prompt)
#         return adversarial_code, prompt

#     def save_to_csv(self, adversarial_results):
#         """Save the adversarial results to a CSV file"""
#         if not adversarial_results:
#             if self.verbose:
#                 print("No adversarial samples to save.")
#             return
            
#         # Use the fixed filename
#         csv_filename = "attack_pool.csv"
        
#         with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
#             writer = csv.writer(csvfile)
#             writer.writerow(['original_function', 'adversarial_function', 'label'])
            
#             for sample in adversarial_results:
#                 writer.writerow([
#                     sample['original_code'],
#                     sample['adversarial_code'],
#                     sample['label']
#                 ])
        
#         if self.verbose:
#             print(f"\nAdversarial samples saved to {csv_filename}")
        
#         return csv_filename

# class SVMBiLSTMAttentionAnalyzer:
#     def __init__(self, max_features=5000, max_len=200, batch_size=32, verbose=0):
#         self.max_features = max_features
#         self.max_len = max_len
#         self.batch_size = batch_size
#         self.verbose = verbose
#         self.tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english', ngram_range=(1, 2), max_df=0.95, min_df=2)
#         self.scaler = StandardScaler(with_mean=False)
#         self.svm_model = None
#         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#         self.model = None
#         self.vocab = None
#         self.word_to_idx = None
#         self.idx_to_word = None
    
#     def preprocess_code(self, code_text):
#         """Preprocess C code by removing comments and normalizing"""
#         # Remove comments
#         code_text = re.sub(r'//.*', '', code_text)
#         code_text = re.sub(r'/\*.*?\*/', '', code_text, flags=re.DOTALL)
        
#         # Remove string literals (which often appear in print statements)
#         code_text = re.sub(r'"[^"]*"', '', code_text)
#         code_text = re.sub(r"'[^']*'", '', code_text)
        
#         # Remove print statements
#         code_text = re.sub(r'print[kf]?\s*\([^)]*\)\s*;', '', code_text)
        
#         # Normalize whitespace
#         code_text = re.sub(r'\s+', ' ', code_text)
#         code_text = code_text.lower().strip()
#         return code_text
    
#     def extract_direct_significant_tokens(self, function_code, k=10):
#         """Extract significant tokens directly from function code without neural model"""
#         # Preprocess code
#         preprocessed_code = self.preprocess_code(function_code)
        
#         # Tokenize by splitting on common C delimiters
#         tokens = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*', preprocessed_code)
        
#         # Filter tokens
#         filtered_tokens = []
#         for token in tokens:
#             # Skip very short words, punctuation, and common C keywords that aren't meaningful
#             if len(token) < 2 or token in ['if', 'for', 'int', 'char', 'void', 'the', 'and', 'or', 'to', 'of', 
#                                            'return', 'static', 'case', 'break', 'switch', 'while', 'struct']:
#                 continue
#             filtered_tokens.append(token)
        
#         # Count token frequency
#         token_counts = Counter(filtered_tokens)
        
#         # Return most common tokens with dummy scores
#         top_tokens = token_counts.most_common(k)
        
#         # Normalize to use the same format as the attention mechanism
#         result = [(token, 0.01) for token, count in top_tokens]
        
#         # Don't add placeholder tokens - just return what we have
#         return result
    
#     def build_vocabulary(self, processed_codes, min_freq=2):
#         """Build vocabulary from processed code"""
#         # Tokenize by splitting on whitespace and keep only valid C identifiers
#         all_tokens = []
#         for code in processed_codes:
#             # Improved tokenization to properly split C tokens
#             tokens = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*', code)
#             all_tokens.extend(tokens)
        
#         # Count token frequencies
#         token_counts = Counter(all_tokens)
        
#         # Filter by frequency and create vocabulary
#         vocab = ['<pad>', '<unk>']
#         for token, count in token_counts.items():
#             if count >= min_freq:
#                 vocab.append(token)
        
#         # Create mappings
#         word_to_idx = {word: idx for idx, word in enumerate(vocab)}
#         idx_to_word = {idx: word for word, idx in word_to_idx.items()}
        
#         self.vocab = vocab
#         self.word_to_idx = word_to_idx
#         self.idx_to_word = idx_to_word
        
#         return vocab, word_to_idx, idx_to_word
    
#     def tokenize_code(self, code):
#         """Convert code to sequence of token indices"""
#         # Improved tokenization to properly split tokens
#         tokens = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*', code)
#         indices = [self.word_to_idx.get(token, 1) for token in tokens]  # 1 is <unk>
        
#         # Pad or truncate to max_len
#         if len(indices) > self.max_len:
#             indices = indices[:self.max_len]
#         else:
#             indices = indices + [0] * (self.max_len - len(indices))  # 0 is <pad>
        
#         return indices
    
#     def extract_support_vectors(self, data):
#         """Extract SVM support vectors for non-vulnerable samples (label 0)"""
#         if self.verbose:
#             print("Preprocessing code data sequentially...")
#         processed_code = [self.preprocess_code(text) for text in data['functionSource']]
        
#         if self.verbose:
#             print("Extracting TF-IDF features...")
#         tfidf_features = self.tfidf_vectorizer.fit_transform(processed_code)
#         tfidf_features_scaled = self.scaler.fit_transform(tfidf_features)
        
#         if self.verbose:
#             print("Training LinearSVC model...")
#         self.svm_model = LinearSVC(C=1.0, random_state=42, max_iter=1000)
#         self.svm_model.fit(tfidf_features_scaled, data['label'])
        
#         support_vector_indices = self.svm_model.support_ if hasattr(self.svm_model, 'support_') else np.arange(tfidf_features_scaled.shape[0])
#         if self.verbose:
#             print(f"Found {len(support_vector_indices)} support vectors")
        
#         # Print label distribution for debugging
#         if self.verbose:
#             print("Label distribution in data:", dict(data['label'].value_counts()))
#             print("Label distribution in support vectors:", dict(data.iloc[support_vector_indices]['label'].value_counts()))
        
#         # Map support vector indices to original data indices
#         original_indices = data.index[support_vector_indices].tolist()
        
#         # Filter for non-vulnerable samples (label 0)
#         non_vuln_sv_mask = data.loc[original_indices, 'label'] == 0
#         non_vuln_sv_indices = np.array(original_indices)[non_vuln_sv_mask]
        
#         if self.verbose:
#             print(f"Found {len(non_vuln_sv_indices)} non-vulnerable support vectors")
        
#         # If no non-vulnerable support vectors, use all support vectors as fallback
#         if len(non_vuln_sv_indices) == 0:
#             if self.verbose:
#                 print("Warning: No non-vulnerable support vectors found. Using all support vectors as fallback.")
#             non_vuln_sv_indices = np.array(original_indices)
        
#         support_vector_codes = data.loc[non_vuln_sv_indices, 'functionSource'].values
#         support_vector_labels = data.loc[non_vuln_sv_indices, 'label'].values
        
#         return support_vector_codes, support_vector_labels, non_vuln_sv_indices
    
#     def train_bilstm_attention(self, sequences, labels, embedding_dim=128, hidden_dim=64, epochs=5):
#         """Train BiLSTM with attention on sequences"""
#         # Prepare dataset and dataloader
#         dataset = CodeDataset(sequences, labels)
#         dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        
#         # Initialize model
#         vocab_size = len(self.vocab)
#         self.model = BiLSTMAttention(vocab_size, embedding_dim, hidden_dim, 1)
#         self.model.to(self.device)
        
#         # Define optimizer and loss function
#         optimizer = optim.Adam(self.model.parameters())
#         criterion = nn.BCEWithLogitsLoss()
        
#         # Training loop
#         self.model.train()
#         for epoch in range(epochs):
#             epoch_loss = 0
#             correct_preds = 0
#             total_preds = 0
            
#             for batch_idx, (text, labels) in enumerate(dataloader):
#                 text = text.to(self.device)
#                 labels = labels.to(self.device)
                
#                 # Zero gradients
#                 optimizer.zero_grad()
                
#                 # Forward pass
#                 predictions, _ = self.model(text)
#                 predictions = predictions.squeeze(1)
                
#                 # Calculate loss
#                 loss = criterion(predictions, labels)
                
#                 # Backward pass
#                 loss.backward()
                
#                 # Update parameters
#                 optimizer.step()
                
#                 # Track metrics
#                 epoch_loss += loss.item()
#                 predicted_labels = torch.sigmoid(predictions) > 0.5
#                 correct_preds += (predicted_labels == labels.bool()).sum().item()
#                 total_preds += labels.size(0)
            
#             # Print epoch results
#             if self.verbose:
#                 avg_loss = epoch_loss / len(dataloader)
#                 accuracy = correct_preds / total_preds
#                 print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")
        
#         if self.verbose:
#             print("Training complete!")
        
#         return self.model
    
#     def extract_attention_weights(self, model, sequences):
#         """Extract attention weights for each sequence"""
#         model.eval()
#         attention_weights_list = []
        
#         # Process in batches
#         batch_size = self.batch_size
#         num_batches = (len(sequences) + batch_size - 1) // batch_size
        
#         with torch.no_grad():
#             for i in range(num_batches):
#                 start_idx = i * batch_size
#                 end_idx = min((i + 1) * batch_size, len(sequences))
#                 batch_sequences = sequences[start_idx:end_idx]
                
#                 # Convert to tensor
#                 batch_tensor = torch.tensor(batch_sequences, dtype=torch.long).to(self.device)
                
#                 # Forward pass
#                 _, attention_weights = model(batch_tensor)
                
#                 # Convert to numpy and save
#                 attention_weights = attention_weights.squeeze(-1).cpu().numpy()
#                 attention_weights_list.append(attention_weights)
        
#         # Concatenate all batches
#         all_attention_weights = np.vstack(attention_weights_list)
        
#         return all_attention_weights
    
#     def get_top_k_attention_words(self, sequences, attention_weights, k=10):
#         """Extract top-k words based on attention weights"""
#         word_attention_scores = {}
        
#         for seq_idx, (sequence, weights) in enumerate(zip(sequences, attention_weights)):
#             for pos, token_id in enumerate(sequence):
#                 if token_id == 0:  # Skip padding
#                     continue
                
#                 # Get the word and its attention weight
#                 word = self.idx_to_word.get(token_id, '<unk>')
#                 weight = weights[pos]
                
#                 # Skip very short words, punctuation, and common C keywords that aren't meaningful
#                 if len(word) < 2 or word in ['if', 'for', 'int', 'char', 'void', 'the', 'and', 'or', 'to', 'of', 
#                                            'return', 'static', 'case', 'break', 'switch', 'while', 'struct',
#                                            '<pad>', '<unk>']:
#                     continue
                
#                 if word not in word_attention_scores:
#                     word_attention_scores[word] = []
                
#                 word_attention_scores[word].append(weight)
        
#         # Calculate average attention score for each word
#         avg_scores = {word: np.mean(scores) for word, scores in word_attention_scores.items()}
        
#         # Get top-k words by attention score
#         top_k_words = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)[:k]
        
#         return top_k_words
    
#     def get_function_attention_words(self, sequence, attention_weights, function_code, k=10):
#         """Extract top-k words for a specific function based on its attention weights"""
#         word_attention_scores = {}
        
#         # First try to get words from attention mechanism
#         for pos, token_id in enumerate(sequence):
#             if token_id == 0:  # Skip padding
#                 continue
            
#             # Get the word and its attention weight
#             word = self.idx_to_word.get(token_id, '<unk>')
#             weight = attention_weights[pos]
            
#             # Skip very short words, punctuation, and common C keywords that aren't meaningful
#             if len(word) < 2 or word in ['if', 'for', 'int', 'char', 'void', 'the', 'and', 'or', 'to', 'of', 
#                                        'return', 'static', 'case', 'break', 'switch', 'while', 'struct',
#                                        '<pad>', '<unk>']:
#                 continue
            
#             # Only count each word once per function with its highest attention value
#             if word not in word_attention_scores or weight > word_attention_scores[word]:
#                 word_attention_scores[word] = weight
        
#         # Get top-k words by attention score
#         top_k_words = sorted(word_attention_scores.items(), key=lambda x: x[1], reverse=True)[:k]
        
#         # Check if the words actually appear in the function
#         valid_words = []
#         for word, score in top_k_words:
#             if word.lower() in function_code.lower():
#                 valid_words.append((word, score))
        
#         # If not enough valid words, use direct token extraction
#         if len(valid_words) < 3:  # We need at least a few valid words
#             direct_tokens = self.extract_direct_significant_tokens(function_code, k)
#             return direct_tokens
        
#         return valid_words
    
#     def analyze_and_generate_adversarial(self, data, k=10, epochs=5, num_samples=200):
#         """Main analysis pipeline with adversarial code generation"""
#         if self.verbose:
#             print("=== Starting SVM Support Vector Extraction ===")
#         sv_codes, sv_labels, sv_indices = self.extract_support_vectors(data)
        
#         if len(sv_codes) == 0:
#             if self.verbose:
#                 print("No support vectors found!")
#             return None, None, None
        
#         if self.verbose:
#             print(f"\n=== Processing {len(sv_codes)} Support Vector Samples ===")
#         processed_sv_codes = [self.preprocess_code(code) for code in sv_codes]
        
#         # Print a sample of processed code to verify preprocessing
#         if processed_sv_codes and self.verbose:
#             print("\nSample of processed code:")
#             print("-" * 50)
#             print(processed_sv_codes[0][:500] + "..." if len(processed_sv_codes[0]) > 500 else processed_sv_codes[0])
#             print("-" * 50)
        
#         # Build vocabulary
#         if self.verbose:
#             print("\n=== Building Vocabulary ===")
#         vocab, word_to_idx, idx_to_word = self.build_vocabulary(processed_sv_codes)
#         if self.verbose:
#             print(f"Vocabulary size: {len(vocab)}")
#             print(f"Sample vocabulary: {vocab[:20]}")
        
#         # Tokenize sequences
#         if self.verbose:
#             print("\n=== Tokenizing Sequences ===")
#         sequences = [self.tokenize_code(code) for code in processed_sv_codes]
#         sequences = np.array(sequences)
#         if self.verbose:
#             print(f"Sequences shape: {sequences.shape}")
        
#         # Train BiLSTM with attention
#         if self.verbose:
#             print("\n=== Training BiLSTM Attention Model ===")
#         model = self.train_bilstm_attention(sequences, sv_labels, epochs=epochs)
        
#         if self.verbose:
#             print(f"\n=== Selecting {num_samples} Samples for Adversarial Generation ===")
        
#         # Initialize the adversarial generator with same verbose level
#         adversarial_generator = AdversarialCodeGenerator(verbose=self.verbose)
        
#         adversarial_results = []
#         selected_indices = np.random.choice(len(sv_codes), min(num_samples, len(sv_codes)), replace=False)
        
#         for i, idx in enumerate(selected_indices):
#             if self.verbose:
#                 print(f"\n--- Generating adversarial code {i+1}/{len(selected_indices)} ---")
#             function_code = sv_codes[idx]
#             label = sv_labels[idx]  # Get the label (0 for non-vulnerable)
#             if self.verbose:
#                 print(f"Original function (first 200 chars):")
#                 print(function_code[:200] + "..." if len(function_code) > 200 else function_code)
            
#             # Extract attention weights for this specific function
#             if self.verbose:
#                 print("\n=== Extracting Function-Specific Attention Words ===")
#             func_sequence = np.array([sequences[idx]])  # Get this function's sequence
            
#             # Get attention weights for this function only
#             func_attention_weights = self.extract_attention_weights(model, func_sequence)
            
#             # Get top-k attention words for this specific function
#             func_top_words = self.get_function_attention_words(func_sequence[0], func_attention_weights[0], function_code, k)
            
#             if self.verbose:
#                 print(f"\n=== Top {min(k, len(func_top_words))} Attention Words for this Function ===")
#                 for j, (word, score) in enumerate(func_top_words, 1):
#                     print(f"{j:2d}. {word:<20} (attention: {score:.4f})")
                
#                 # Verify if attention words are present in the function
#                 print("\nChecking attention words presence in this function:")
#                 present_words = []
#                 for word, score in func_top_words:
#                     if word.lower() in function_code.lower():
#                         present_words.append(f"{word} (✓)")
#                     else:
#                         present_words.append(f"{word} (✗)")
#                 print(", ".join(present_words))
            
#             # Generate adversarial code using the extracted words
#             adversarial_code, prompt_used = adversarial_generator.generate_adversarial_code(
#                 function_code, func_top_words
#             )
            
#             if self.verbose:
#                 print(f"\nGenerated adversarial code:")
#                 print(adversarial_code)
#                 print("-" * 50)
            
#             adversarial_results.append({
#                 'original_code': function_code,
#                 'adversarial_code': adversarial_code,
#                 'prompt_used': prompt_used,
#                 'sv_index': sv_indices[idx],
#                 'label': int(label)  # Store the original label (0 for non-vulnerable)
#             })
        
#         # Save adversarial samples to CSV
#         self.save_to_csv(adversarial_results)
        
#         return None, sv_indices, adversarial_results
        
#     def save_to_csv(self, adversarial_results):
#         """Save the adversarial results to a CSV file"""
#         if not adversarial_results:
#             if self.verbose:
#                 print("No adversarial samples to save.")
#             return
            
#         # Use the fixed filename
#         csv_filename = "attack_pool.csv"
        
#         with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
#             writer = csv.writer(csvfile)
#             writer.writerow(['original_code', 'adversarial_code', 'label'])
            
#             for sample in adversarial_results:
#                 writer.writerow([
#                     sample['original_code'],
#                     sample['adversarial_code'],
#                     sample['label']
#                 ])
        
#         if self.verbose:
#             print(f"\nAdversarial samples saved to {csv_filename}")
        
#         return csv_filename

# # Initialize analyzer with verbose mode option
# verbose=1
# analyzer = SVMBiLSTMAttentionAnalyzer(max_features=5000, max_len=200, verbose=verbose)

# analyzer.verbose = verbose

# # Initialize adversarial generator
# adversarial_generator = AdversarialCodeGenerator(verbose=verbose)

# # Run analysis with adversarial generation
# top_tokens, support_vector_indices, adversarial_samples = analyzer.analyze_and_generate_adversarial(
#     test_data, k=10, epochs=5, num_samples=399
# )

# if adversarial_samples:
#     print(f"\nAnalysis completed successfully!")
#     print(f"Processed {len(support_vector_indices)} support vector samples")
#     print(f"Generated {len(adversarial_samples)} adversarial code samples")
    
#     print("\n=== Adversarial Generation Summary ===")
#     for i, sample in enumerate(adversarial_samples, 1):
#         print(f"\nSample {i}:")
#         print(f"Lines of adversarial code: {len(sample['adversarial_code'].splitlines())}")
#         print(f"Support vector index: {sample['sv_index']}")
#         if 'Error' not in sample['adversarial_code']:
#             print("✓ Successfully generated")
#         else:
#             print("✗ Generation failed:", sample['adversarial_code'][:100])

# FGA

In [2]:
import pandas as pd
import numpy as np
import random
import json
import sklearn.metrics
import math
from tqdm import tqdm

def centriod_init(K, min_distance):
    random_center = []
    attempts = 0

    while len(random_center) < K:
        num = random.uniform(0, 1)
        if all(abs(num - existing) >= min_distance for existing in random_center):
            random_center.append(num)
        attempts += 1
        if attempts > 100:  # Avoid infinite loops
            raise ValueError(
                "Failed to generate numbers with the required minimum distance. Try a different min_distance.")

    return np.array(random_center)

def get_fitness_score(pre_result_path, adv_file_path, snippet_len, penalty):
  """
  This function calculates the fitness score of the expected adversarial files.

  input:
  pre_result_path: Path of the predicted results;
  adv_file_path: Path of the advesarial files (with labels);

  output:
  The fitness score of the created adveersrial file.
  """

  f = open(pre_result_path)
  line = f.readline()
  pre_dic = {}
  while line:
      split_data = line.split('\t')
      pre_dic[int(split_data[0])] = int(split_data[1].split('\n')[0])
      line = f.readline()
  f.close()


  test_lines = []
  for line in open(adv_file_path, 'r'):
    test_lines.append(json.loads(line))
  test_data_dic = {}
  for i in test_lines:
      test_data_dic[i['idx']]=i['target']


  pre_list = []
  true_list = []
  for i in test_data_dic.keys():
      pre_list.append(pre_dic[i] )
      true_list.append(test_data_dic[i])

  return 1 - sklearn.metrics.accuracy_score(true_list,pre_list) - penalty * snippet_len


def calcaulate_weight(data, centroid_array):

    cluster_num = len(centroid_array)
    weight = []

    for j in range(cluster_num):
        up = data - centroid_array[j]
        weights_array = np.array([((up/(data - center))**2)**(1/cluster_num-1) if abs(data - center) > 1e-10 else 1e10 for center in centroid_array])
        weight.append(1/np.sum(weights_array))

    return np.array(weight)

def calculate_cost(weight, data, centroid_array, alpha):

    pal_weight = weight ** alpha
    dis = np.array([np.abs(data - center) for center in centroid_array])
    cost = np.dot(pal_weight, dis)

    return cost


def select(pop_dict, centroid, centriod_array, decay_rate):    # nature selection wrt pop's fitness

    fitness_values = []
    keys_list = []

    for key in pop_dict.keys():
      fitness_values.append(pop_dict[key])
      keys_list.append(key)

    sorted_values = sorted(fitness_values, reverse=True)
    factor = []
    for value in sorted_values:
        weights_array = np.array([(((value - centroid) / (value - center)) ** 2) ** (1 / len(centriod_array) - 1) for center in centriod_array])
        weight = 1/np.sum(weights_array)
        f = (weight ** decay_rate) * np.abs(value - centroid)
        factor.append(math.exp(f))

    f_sum = np.sum(factor)
    p = np.array([element/f_sum for element in factor])
    candidate = np.random.choice(sorted_values, p=p.ravel())
    index = fitness_values.index(candidate)
    can_snippet = keys_list[index]
    return can_snippet

def update_global_pop(offsprings, total_pop, fit_scores):

    pop_num = len(total_pop)

    for idx in range(len(offsprings)):
        offspring = offsprings[idx]
        fit_score = fit_scores[idx]
        total_pop[offspring] = fit_score

    sorted_pop = dict(sorted(total_pop.items(), key=lambda x: x[1], reverse=True))
    current_num = len(sorted_pop)
    cut_memeber = list(sorted_pop.keys())[pop_num:current_num]
    for member in cut_memeber:
        sorted_pop.pop(member)

    return sorted_pop

def get_vul_idx(label_list, pred, target):
    vul_idx = []

    for i in label_list:
      if pred[i] == 1 and target[i] == 1:
        vul_idx.append(i)

    return vul_idx

def get_vul_codes(test_dicts, vul_idx):
    vul_codes = {}

    for i in test_dicts:
       if i['idx'] in vul_idx:
         vul_codes[i['idx']] = i['func']
    return vul_codes

def read_adv_code_snippet(adv_snippet_file_path):
    with open(adv_snippet_file_path, 'r') as f:
        lines = f.readlines()

    adver_content = []
    for i in range(len(lines)):
        if i >= 1:
          line_list = lines[i].split()
          del line_list[1:3]
          line = " ".join(line_list)
          if line != "":
              adver_content.append(line)

    return adver_content

def add_adver_sample_2_ast(vul_codes, insert_position, ad_content):

    temp = []
    names = []
    for item in tqdm(vul_codes.keys()):
        codes = vul_codes[item].split()
        codes_len = len(codes)
        # insert_idx = random.randint(0, codes_len)
        insert_idx = 15

        for i in range(len(ad_content)):
            codes.insert(insert_position, ad_content[i])
            insert_idx+=1
        temp.append(" ".join(codes))
        names.append(item)

    return temp, names

def write_adv_to_json(ast_test_codes, ast_test_names, ast_test_labels, output_name):
    from collections import defaultdict
    ast_dicts = []

    for i in tqdm(range(len(ast_test_codes))):
        record = defaultdict()

        record['func'] = ast_test_codes[i]
        record['idx'] = i
        record['project'] = ast_test_names[i]
        record['target'] = ast_test_labels[i]

        ast_dicts.append(record)

    with open(output_name, 'w') as f:
        for data in ast_dicts:
            line = json.dumps(data)
            f.write(line+'\n')





# Select and test

In [11]:
import numpy as np
import pandas as pd
import torch
import os
import re
import random
import glob
import json
import math
from tqdm import tqdm
from collections import defaultdict
from copy import deepcopy
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import sys
try:
    import seaborn as sns
except ImportError:
    print("Warning: Seaborn not installed. Some visualizations may not work.")



class AdversarialLearning:
    """
    Adversarial Learning class implementing Fuzzy Genetic Algorithm for
    optimizing adversarial samples against a vulnerability detection model.
    """
    
    def __init__(self, attack_pool_path="attack_pool.csv", model_path=None, 
                 pop_size=20, clusters=3, max_generations=50, decay_rate=1.5, 
                 alpha=2.0, penalty=0.01, verbose=1):
        """
        Initialize the Adversarial Learning with FGA
        
        Args:
            attack_pool_path: Path to the attack pool CSV
            model_path: Path to the trained CodeBERT model (optional)
            pop_size: Population size for genetic algorithm
            clusters: Number of fuzzy clusters
            max_generations: Maximum number of generations
            decay_rate: Decay rate for fuzzy clustering
            alpha: Fuzziness factor
            penalty: Penalty factor for code snippet length
            verbose: Verbosity level
        """
        self.attack_pool_path = attack_pool_path
        self.model_path = model_path
        self.pop_size = pop_size
        self.clusters = clusters
        self.max_generations = max_generations
        self.decay_rate = decay_rate
        self.alpha = alpha
        self.penalty = penalty
        self.verbose = verbose
        
        # Load attack pool
        self.attack_pool = self._load_attack_pool()
        
        # Initialize model trainer and model
        self.trainer = None
        self.model = None
        if model_path:
            self._load_model(model_path)
        
        # Initialize population and centroids
        self.population = {}
        self.centroids = None
        
        # Add storage for loaded predictions
        self.original_predictions = None
        self.prediction_file_path = None
        
    def _load_attack_pool(self):
        """Load the attack pool CSV file"""
        if not os.path.exists(self.attack_pool_path):
            raise FileNotFoundError(f"Attack pool file not found: {self.attack_pool_path}")
        
        attack_pool = pd.read_csv(self.attack_pool_path)
        
        if self.verbose:
            print(f"\n=== ATTACK POOL LOADING ===")
            print(f"Raw attack pool shape: {attack_pool.shape}")
            print(f"Available columns: {list(attack_pool.columns)}")
        
        # Handle different attack pool formats - only expect adversarial code
        if 'adversarial_code' in attack_pool.columns:
            # Standard format: adversarial_code column
            if self.verbose:
                print(f"Detected attack pool format with 'adversarial_code' column")
            attack_pool_standardized = attack_pool[['adversarial_code']].copy()
            
        else:
            # Try to auto-detect format based on available columns
            available_columns = list(attack_pool.columns)
            if self.verbose:
                print(f"Available columns in attack pool: {available_columns}")
            
            # If there's only one column, assume it contains adversarial code
            if len(available_columns) == 1:
                adversarial_column = available_columns[0]
                if self.verbose:
                    print(f"Using single column '{adversarial_column}' as adversarial code")
                
                attack_pool_standardized = pd.DataFrame({
                    'adversarial_code': attack_pool[adversarial_column].values
                })
            else:
                raise ValueError(f"Attack pool format not recognized. Expected 'adversarial_code' column. Found columns: {available_columns}")
        
        # Remove any rows with NaN values
        initial_size = len(attack_pool_standardized)
        attack_pool_standardized = attack_pool_standardized.dropna()
        final_size = len(attack_pool_standardized)
        
        if self.verbose:
            print(f"Attack pool processed successfully:")
            print(f"  Initial size: {initial_size}")
            print(f"  After removing NaN: {final_size}")
            print(f"  Final shape: {attack_pool_standardized.shape}")
            print(f"Sample adversarial codes:")
            for i, code in enumerate(attack_pool_standardized['adversarial_code'].head(3)):
                print(f"  [{i+1}] {code[:100]}{'...' if len(code) > 100 else ''}")
        
        return attack_pool_standardized
    
    def _load_model(self, model_path):
        """Load the CodeBERT model"""
        try:
            # Initialize the trainer
            self.trainer = CodeBERTTrainer()
            
            # Load model from saved path
            if os.path.exists(model_path):
                self.model = self.trainer.load_model(model_path)
                
                # CRITICAL FIX: Ensure model is in evaluation mode
                if self.model is not None:
                    self.model.eval()
                    # Also ensure trainer's model is in eval mode
                    if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                        self.trainer.model.eval()
                
                if self.verbose:
                    print(f"Successfully loaded model from {model_path}")
                    print(f"Model is in eval mode: {not self.model.training}")
                    
                    # Test model prediction to verify it's working
                    test_code = "void test() { char buf[10]; }"
                    try:
                        test_pred = self.trainer.predict(test_code)
                        print(f"Model test prediction: {test_pred}")
                    except Exception as e:
                        print(f"Warning: Model test prediction failed: {str(e)}")
            else:
                self.model = None
                if self.verbose:
                    print(f"Model path {model_path} not found. Will train a new model when needed.")
        except Exception as e:
            self.model = None
            print(f"Error loading model: {str(e)}")
            print(f"Model path attempted: {model_path}")
            if os.path.exists(model_path):
                print(f"Path exists but model loading failed")
                # List files in model directory for debugging
                if os.path.isdir(model_path):
                    print(f"Files in model directory: {os.listdir(model_path)}")
            else:
                print(f"Model path does not exist")
    
    def initialize_population(self):
        """Initialize the population with random adversarial code snippets"""
        if self.verbose:
            print(f"\n=== POPULATION INITIALIZATION ===")
            print(f"Attack pool size: {len(self.attack_pool)}")
            print(f"Requested population size: {self.pop_size}")
        
        # Sample from attack pool to create initial population
        if len(self.attack_pool) < self.pop_size:
            # If attack pool is smaller than pop_size, duplicate some samples
            indices = np.random.choice(len(self.attack_pool), self.pop_size, replace=True)
            if self.verbose:
                print(f"Attack pool smaller than population size - sampling with replacement")
        else:
            # Sample without replacement
            indices = np.random.choice(len(self.attack_pool), self.pop_size, replace=False)
            if self.verbose:
                print(f"Attack pool larger than population size - sampling without replacement")
        
        # Initialize population dictionary with fitness scores set to 0
        self.population = {}
        for idx in indices:
            adv_code = self.attack_pool.iloc[idx]['adversarial_code']
            self.population[adv_code] = 0  # Initial fitness score
        
        # Initialize centroids from uniform distribution
        min_distance = 1.0 / (self.clusters * 2)  # Ensure centroids are reasonably spaced
        self.centroids = centriod_init(self.clusters, min_distance)
        
        if self.verbose:
            print(f"Population successfully initialized:")
            print(f"  Population size: {len(self.population)}")
            print(f"  Unique adversarial codes: {len(set(self.population.keys()))}")
            print(f"  Clusters: {self.clusters}")
            print(f"  Initial centroids: {self.centroids}")
            
            # Show a few sample adversarial codes from the population
            print(f"Sample population codes:")
            for i, code in enumerate(list(self.population.keys())[:3]):
                print(f"  [{i+1}] {code[:80]}{'...' if len(code) > 80 else ''}")
        
        return self.population, self.centroids
    
    def calculate_fitness(self, original_df, adversarial_code, model=None, return_attack_rate=False):
        """
        Calculate fitness score for an adversarial code snippet
        
        Args:
            original_df: DataFrame containing original code
            adversarial_code: Adversarial code snippet
            model: Trained model (if None, will use loaded predictions from txt)
            
        Returns:
            Fitness score based on attack success rate and snippet length
        """
        # Create a copy of the original dataframe
        adv_df = original_df.copy()
        
        # Apply the adversarial code to each sample
        # Only add adversarial code to samples labeled as vulnerable (label=1)
        vulnerable_samples = adv_df['label'] == 1
        num_vulnerable = vulnerable_samples.sum()
        
        if num_vulnerable == 0:
            # If no vulnerable samples, make some samples vulnerable for testing
            if self.verbose >= 2:
                print("No vulnerable samples found. Creating synthetic vulnerable samples.")
            # Mark a portion of samples as vulnerable for testing
            sample_indices = np.random.choice(len(adv_df), max(1, len(adv_df) // 4), replace=False)
            adv_df.loc[sample_indices, 'label'] = 1
            vulnerable_samples = adv_df['label'] == 1
            num_vulnerable = vulnerable_samples.sum()
        
        # ENHANCED DIAGNOSTICS: Show dataset composition
        if self.verbose >= 2:
            total_samples = len(original_df)
            vulnerable_labeled = (original_df['label'] == 1).sum()
            benign_labeled = (original_df['label'] == 0).sum()
            print(f"\n=== DATASET COMPOSITION ===")
            print(f"Total samples: {total_samples}")
            print(f"Labeled as vulnerable: {vulnerable_labeled}")
            print(f"Labeled as benign: {benign_labeled}")
            print(f"Applying adversarial code to {num_vulnerable} vulnerable samples")
        
        # Use loaded predictions if available, otherwise use model
        if self.original_predictions is not None:
            if self.verbose >= 2:
                print("Using loaded predictions from txt file")
            
            # Ensure predictions match the dataset size
            if len(self.original_predictions) != len(original_df):
                print(f"Warning: Prediction length ({len(self.original_predictions)}) doesn't match dataset length ({len(original_df)})")
                # Try to align predictions with dataset
                if len(self.original_predictions) > len(original_df):
                    original_predictions = self.original_predictions[:len(original_df)]
                else:
                    # Pad with zeros if needed
                    original_predictions = np.pad(self.original_predictions, 
                                                (0, len(original_df) - len(self.original_predictions)), 
                                                constant_values=0)
            else:
                original_predictions = self.original_predictions.copy()
        else:
            # Fall back to model predictions if no txt file is loaded
            if self.verbose >= 2:
                print("No loaded predictions found, using model to predict")
            
            # Set verbosity based on self.verbose level
            if self.verbose <= 1:
                # Temporarily reduce print output
                old_stdout = sys.stdout
                sys.stdout = open(os.devnull, 'w')
            
            # Function to get predictions using the model directly
            def get_predictions(df):
                predictions = []
                try:
                    # CRITICAL FIX: Ensure model is in evaluation mode before predictions
                    if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                        self.trainer.model.eval()
                    
                    for _, row in df.iterrows():
                        code = row['functionSource']
                        # Try the trainer's predict method with error handling
                        try:
                            pred = self.trainer.predict(code)
                            if isinstance(pred, dict) and 'prediction' in pred:
                                predictions.append(pred['prediction'])
                            else:
                                # If the predict method returns an unexpected format,
                                # just use a default prediction of non-vulnerable
                                if self.verbose >= 2:
                                    print(f"Predict returned unexpected format: {pred}")
                                predictions.append(0)  # Default to non-vulnerable
                        except Exception as e:
                            if self.verbose >= 2:
                                print(f"Error in prediction for sample: {str(e)}")
                                print(f"Code length: {len(code)}")
                            # Default to predicting as non-vulnerable (0) if there's an error
                            predictions.append(0)
                except Exception as e:
                    if self.verbose >= 2:
                        print(f"Error in get_predictions: {str(e)}")
                    # Return all zeros if there's a major error
                    predictions = [0] * len(df)
                return np.array(predictions)
            
            # Get predictions on original data
            original_predictions = get_predictions(original_df)
            
            if self.verbose <= 1:
                # Restore print output
                sys.stdout.close()
                sys.stdout = old_stdout

        # Count initially vulnerable samples that were correctly predicted as vulnerable
        vulnerable_indices = np.where(vulnerable_samples)[0]
        correctly_identified_vulnerabilities = sum(1 for i in vulnerable_indices 
                                                 if original_predictions[i] == 1)

        # ENHANCED DIAGNOSTICS: Show model performance breakdown
        if self.verbose >= 2:
            print(f"\n=== MODEL PERFORMANCE BREAKDOWN ===")
            
            # Count predictions by true label
            true_vulnerable_indices = np.where(original_df['label'] == 1)[0]
            true_benign_indices = np.where(original_df['label'] == 0)[0]
            
            # For vulnerable samples
            vuln_pred_as_vuln = sum(1 for i in true_vulnerable_indices if original_predictions[i] == 1)
            vuln_pred_as_benign = sum(1 for i in true_vulnerable_indices if original_predictions[i] == 0)
            
            # For benign samples  
            benign_pred_as_vuln = sum(1 for i in true_benign_indices if original_predictions[i] == 1)
            benign_pred_as_benign = sum(1 for i in true_benign_indices if original_predictions[i] == 0)
            
            print(f"Vulnerable samples (label=1): {len(true_vulnerable_indices)} total")
            print(f"  → Predicted as vulnerable: {vuln_pred_as_vuln}")
            print(f"  → Predicted as benign: {vuln_pred_as_benign}")
            print(f"Benign samples (label=0): {len(true_benign_indices)} total")
            print(f"  → Predicted as vulnerable: {benign_pred_as_vuln}")
            print(f"  → Predicted as benign: {benign_pred_as_benign}")
            
            model_accuracy = (vuln_pred_as_vuln + benign_pred_as_benign) / len(original_df)
            vulnerable_recall = vuln_pred_as_vuln / len(true_vulnerable_indices) if len(true_vulnerable_indices) > 0 else 0
            
            print(f"Model accuracy: {model_accuracy:.4f}")
            print(f"Vulnerable recall: {vulnerable_recall:.4f}")
            print(f"Samples available for attack: {correctly_identified_vulnerabilities}")
        
        if correctly_identified_vulnerabilities == 0:
            if self.verbose >= 2:
                print("No vulnerabilities correctly identified by model. Attack cannot succeed.")
            # Return zero fitness since we can't measure attack success
            if return_attack_rate:
                return 0.0, 0.0
            else:
                return 0.0
        
        # Insert adversarial code into vulnerable samples using improved strategies
        for idx in adv_df.index[vulnerable_samples]:
            # Skip samples not originally predicted as vulnerable
            if original_predictions[idx] != 1:
                continue
                
            # Insert adversarial code using multiple improved strategies
            orig_code = adv_df.loc[idx, 'functionSource']
            
            # Strategy 1: Insert at function level (after function declaration)
            # Strategy 2: Insert at variable declaration level
            # Strategy 3: Insert at loop/conditional level
            # Strategy 4: Insert at critical computation points
            
            code_lines = orig_code.split('\n')
            code_len = len(code_lines)
            
            # More sophisticated insertion strategy
            # Look for function declarations, variable declarations, loops, etc.
            enhanced_code_lines = code_lines.copy()
            insertions_made = 0
            
            for i, line in enumerate(code_lines):
                line_stripped = line.strip().lower()
                
                # Insert after function declarations
                if (any(keyword in line_stripped for keyword in ['void ', 'int ', 'char ', 'function ', 'def ']) and 
                    ('(' in line and ')' in line and '{' in line) and insertions_made < 2):
                    enhanced_code_lines.insert(i + 1 + insertions_made, f"    {adversarial_code}")
                    insertions_made += 1
                
                # Insert before variable declarations involving user input
                elif (any(keyword in line_stripped for keyword in ['scanf', 'gets', 'input', 'read']) and 
                      insertions_made < 3):
                    enhanced_code_lines.insert(i + insertions_made, f"    {adversarial_code}")
                    insertions_made += 1
                
                # Insert in loop bodies
                elif (any(keyword in line_stripped for keyword in ['for ', 'while ', 'do ']) and 
                      insertions_made < 2):
                    enhanced_code_lines.insert(i + 1 + insertions_made, f"        {adversarial_code}")
                    insertions_made += 1
            
            # If no strategic insertions were made, fall back to fixed positions
            if insertions_made == 0:
                # Use more aggressive insertion at multiple fixed positions
                positions = [
                    min(2, code_len - 1),     # Near the beginning
                    code_len // 2,            # Middle
                    max(1, code_len - 2)      # Near the end
                ]
                
                for i, pos in enumerate(positions):
                    enhanced_code_lines.insert(pos + i, f"    {adversarial_code}")
            
            # Apply the modified code
            adv_df.loc[idx, 'functionSource'] = '\n'.join(enhanced_code_lines)
        
        # Get adversarial predictions
        if self.original_predictions is not None and hasattr(self, 'trainer') and self.trainer is not None:
            # Use model to predict adversarial samples since we need new predictions
            if self.verbose >= 2:
                print("Using model to predict adversarial samples")
            
            # Set verbosity based on self.verbose level
            if self.verbose <= 1:
                # Temporarily reduce print output
                old_stdout = sys.stdout
                sys.stdout = open(os.devnull, 'w')
            
            # Function to get predictions using the model directly
            def get_adversarial_predictions(df):
                predictions = []
                try:
                    # CRITICAL FIX: Ensure model is in evaluation mode before predictions
                    if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                        self.trainer.model.eval()
                    
                    for _, row in df.iterrows():
                        code = row['functionSource']
                        # Try the trainer's predict method with error handling
                        try:
                            pred = self.trainer.predict(code)
                            if isinstance(pred, dict) and 'prediction' in pred:
                                predictions.append(pred['prediction'])
                            else:
                                predictions.append(0)  # Default to non-vulnerable
                        except Exception as e:
                            if self.verbose >= 2:
                                print(f"Error in adversarial prediction for sample: {str(e)}")
                            predictions.append(0)
                except Exception as e:
                    if self.verbose >= 2:
                        print(f"Error in get_adversarial_predictions: {str(e)}")
                    predictions = [0] * len(df)
                return np.array(predictions)
            
            # Get predictions on adversarial data
            adversarial_predictions = get_adversarial_predictions(adv_df)
            
            if self.verbose <= 1:
                # Restore print output
                sys.stdout.close()
                sys.stdout = old_stdout
        else:
            # If no model available, assume adversarial attack fails
            adversarial_predictions = original_predictions.copy()
        
        # FIXED: Calculate attack success rate properly
        # Focus only on vulnerable samples that were correctly identified initially
        initially_vulnerable_and_detected = []
        for i in vulnerable_indices:
            if original_predictions[i] == 1:  # Was correctly identified as vulnerable
                initially_vulnerable_and_detected.append(i)
        
        # Count successful attacks (vulnerable samples that became non-vulnerable)
        successful_attacks = 0
        total_prediction_changes = 0
        zero_to_one = 0  # Non-vulnerable to vulnerable
        one_to_zero = 0  # Vulnerable to non-vulnerable (this is what we want)
        
        for i in initially_vulnerable_and_detected:
            if adversarial_predictions[i] != original_predictions[i]:
                total_prediction_changes += 1
                if original_predictions[i] == 1 and adversarial_predictions[i] == 0:
                    successful_attacks += 1
                    one_to_zero += 1
        
        # Also check all samples for any changes (for debugging)
        for i in range(len(original_predictions)):
            if original_predictions[i] != adversarial_predictions[i]:
                if original_predictions[i] == 0 and adversarial_predictions[i] == 1:
                    zero_to_one += 1
        
        # Calculate attack success rate based on initially vulnerable and detected samples only
        if len(initially_vulnerable_and_detected) > 0:
            attack_success_rate = successful_attacks / len(initially_vulnerable_and_detected)
        else:
            attack_success_rate = 0.0
        
        if self.verbose >= 2:
            print(f"Attack success rate: {attack_success_rate:.4f} ({successful_attacks}/{len(initially_vulnerable_and_detected)} samples changed prediction)")
            print(f"  - 0→1 changes: {zero_to_one}/{len(original_predictions)} ({zero_to_one/len(original_predictions):.4f})")
            print(f"  - 1→0 changes: {one_to_zero}/{len(original_predictions)} ({one_to_zero/len(original_predictions):.4f})")
        
        # Calculate penalty for code snippet length
        snippet_length = len(adversarial_code.splitlines())
        length_penalty = self.penalty * snippet_length
        
        # Calculate fitness score - heavily weight attack success
        # Use a more aggressive fitness function that rewards high attack success rates
        if attack_success_rate > 0.8:  # Bonus for very high success rates
            fitness_score = attack_success_rate + 0.2 - length_penalty
        elif attack_success_rate > 0.5:  # Bonus for moderate success rates
            fitness_score = attack_success_rate + 0.1 - length_penalty
        else:
            fitness_score = attack_success_rate - length_penalty
        
        if self.verbose >= 2:
            print(f"Adversarial snippet length: {snippet_length}")
            print(f"Length penalty: {length_penalty:.4f}")
            print(f"Fitness score: {fitness_score:.4f}")
        
        if return_attack_rate:
            return fitness_score, attack_success_rate
        else:
            return fitness_score
    
    def perform_fuzzy_clustering(self):
        """Perform fuzzy clustering on the population"""
        # Get fitness scores as array
        keys = list(self.population.keys())
        scores = np.array([self.population[k] for k in keys])
        
        # Calculate fuzzy membership weights for each sample
        membership_weights = {}
        for key, score in zip(keys, scores):
            weight = calcaulate_weight(score, self.centroids)
            membership_weights[key] = weight
        
        # Update centroids based on weighted scores
        new_centroids = np.zeros_like(self.centroids)
        for k in range(len(self.centroids)):
            numerator = 0
            denominator = 0
            
            for key, score in zip(keys, scores):
                weight = membership_weights[key][k]
                weight_alpha = weight ** self.alpha
                numerator += weight_alpha * score
                denominator += weight_alpha
            
            new_centroids[k] = numerator / denominator if denominator > 0 else self.centroids[k]
        
        # Check for convergence
        centroid_change = np.sum(np.abs(new_centroids - self.centroids))
        self.centroids = new_centroids
        
        if self.verbose >= 2:
            print(f"Updated centroids: {self.centroids}")
            print(f"Centroid change: {centroid_change:.6f}")
        
        return membership_weights, centroid_change
    
    def select_clusters(self):
        """Select top 2 clusters based on centroid magnitude"""
        # Sort centroids by magnitude (fitness score)
        sorted_indices = np.argsort(self.centroids)[::-1]
        top_clusters = sorted_indices[:2]  # Select top 2 clusters
        
        if self.verbose >= 2:
            print(f"Selected top clusters: {top_clusters} with centroids {self.centroids[top_clusters]}")
        
        return top_clusters
    
    def perform_crossover(self, membership_weights, top_clusters):
        """Perform crossover operation to create offspring"""
        keys = list(self.population.keys())
        
        # Select parents from top clusters
        parents = []
        for _ in range(self.pop_size // 2):  # Create pop_size/2 offspring
            # Use the original select function from fga_selection.py with error handling
            try:
                # Select parent from first top cluster
                parent1 = select(self.population, self.centroids[top_clusters[0]], 
                                self.centroids, self.decay_rate)
                
                # Select parent from second top cluster  
                parent2 = select(self.population, self.centroids[top_clusters[1]], 
                                self.centroids, self.decay_rate)
                
                parents.append((parent1, parent2))
            except (ZeroDivisionError, ValueError, np.linalg.LinAlgError, OverflowError) as e:
                # Handle numerical issues from original select function
                if self.verbose >= 2:
                    print(f"Numerical error in parent selection: {str(e)}")
                    print("Falling back to random selection")
                
                parent1 = random.choice(keys)
                parent2 = random.choice(keys)
                parents.append((parent1, parent2))
            except Exception as e:
                # Fallback to random selection if there's any other issue
                if self.verbose >= 2:
                    print(f"Error in parent selection: {str(e)}")
                    print("Falling back to random selection")
                
                parent1 = random.choice(keys)
                parent2 = random.choice(keys)
                parents.append((parent1, parent2))
        
        # Create offspring through improved crossover strategies
        offspring = []
        for parent1, parent2 in parents:
            # Enhanced crossover: create multiple offspring variations per parent pair
            for variation in range(2):  # Create 2 variations per parent pair
                p1_lines = parent1.split('\n')
                p2_lines = parent2.split('\n')
                
                # Intelligent crossover that preserves vulnerability patterns
                if len(p1_lines) <= 1 or len(p2_lines) <= 1:
                    # For very short snippets, combine them strategically
                    if variation == 0:
                        child = parent1 + '; ' + parent2  # Combine on same line
                    else:
                        child = parent1 + '\n' + parent2  # Combine on separate lines
                else:
                    # Multiple sophisticated crossover strategies
                    strategy = random.choice(['semantic_mix', 'vulnerability_focused', 'pattern_preservation', 'obfuscation_mix'])
                    
                    if strategy == 'semantic_mix':
                        # Mix based on semantic patterns (vulnerabilities vs normal code)
                        vuln_keywords = ['malloc', 'free', 'strcpy', 'gets', 'sprintf', 'system', 'exec']
                        
                        # Separate vulnerable and normal lines
                        p1_vuln = [line for line in p1_lines if any(kw in line.lower() for kw in vuln_keywords)]
                        p1_normal = [line for line in p1_lines if not any(kw in line.lower() for kw in vuln_keywords)]
                        p2_vuln = [line for line in p2_lines if any(kw in line.lower() for kw in vuln_keywords)]
                        p2_normal = [line for line in p2_lines if not any(kw in line.lower() for kw in vuln_keywords)]
                        
                        # Combine vulnerable parts from both parents with normal parts
                        child_lines = []
                        if p1_vuln: child_lines.extend(p1_vuln[:2])  # Take first 2 vuln lines from p1
                        if p2_normal: child_lines.extend(p2_normal[:1])  # Mix with normal from p2
                        if p2_vuln: child_lines.extend(p2_vuln[:2])  # Take vuln lines from p2
                        if p1_normal: child_lines.extend(p1_normal[:1])  # Mix with normal from p1
                    
                    elif strategy == 'vulnerability_focused':
                        # Focus on combining different types of vulnerabilities
                        # Take the most dangerous-looking lines from each parent
                        danger_keywords = ['overflow', 'injection', 'format', 'buffer', 'memory', 'null', 'free', 'alloc']
                        
                        p1_danger = [line for line in p1_lines if any(kw in line.lower() for kw in danger_keywords)]
                        p2_danger = [line for line in p2_lines if any(kw in line.lower() for kw in danger_keywords)]
                        
                        child_lines = []
                        # Interleave dangerous patterns
                        max_danger = max(len(p1_danger), len(p2_danger))
                        for i in range(max_danger):
                            if i < len(p1_danger):
                                child_lines.append(p1_danger[i])
                            if i < len(p2_danger):
                                child_lines.append(p2_danger[i])
                        
                        # Fill in with remaining lines if needed
                        if not child_lines:
                            child_lines = p1_lines[:len(p1_lines)//2] + p2_lines[len(p2_lines)//2:]
                    
                    elif strategy == 'pattern_preservation':
                        # Preserve important patterns while mixing
                        # Look for function calls, variable declarations, etc.
                        p1_funcs = [line for line in p1_lines if '(' in line and ')' in line]
                        p1_vars = [line for line in p1_lines if any(typ in line.lower() for typ in ['char', 'int', 'void', 'size_t'])]
                        p2_funcs = [line for line in p2_lines if '(' in line and ')' in line]
                        p2_vars = [line for line in p2_lines if any(typ in line.lower() for typ in ['char', 'int', 'void', 'size_t'])]
                        
                        child_lines = []
                        # Combine variable declarations and function calls strategically
                        if p1_vars: child_lines.extend(p1_vars[:2])
                        if p2_funcs: child_lines.extend(p2_funcs[:2])
                        if p2_vars: child_lines.extend(p2_vars[:1])
                        if p1_funcs: child_lines.extend(p1_funcs[:2])
                    
                    else:  # obfuscation_mix
                        # Create obfuscated combinations
                        # Take parts from each parent and add obfuscating comments
                        p1_half = len(p1_lines) // 2
                        p2_half = len(p2_lines) // 2
                        
                        child_lines = []
                        child_lines.extend(p1_lines[:p1_half])
                        child_lines.append("// Security check passed")  # Obfuscating comment
                        child_lines.extend(p2_lines[p2_half:])
                        if random.random() < 0.5:
                            child_lines.append("// Code reviewed and approved")  # More obfuscation
                
                child = '\n'.join(child_lines) if 'child_lines' in locals() else parent1
                
                # Ensure the child is not empty or too short
                if len(child.strip()) < 5:
                    child = parent1 if len(parent1) > len(parent2) else parent2  # Use longer parent
                
                # Add mutation to create more diversity
                if random.random() < 0.3:  # 30% mutation rate
                    child = self._mutate_adversarial_code(child)
                    
                offspring.append(child)
        
        # Ensure we don't exceed population size
        offspring = offspring[:self.pop_size//2]
        
        if self.verbose >= 2:
            print(f"Created {len(offspring)} offspring through enhanced crossover")
        
        return offspring
    
    def _mutate_adversarial_code(self, code):
        """Apply mutation to adversarial code to increase diversity"""
        lines = code.split('\n')
        
        mutation_types = ['add_comment', 'modify_variable', 'add_vulnerability', 'obfuscate']
        mutation = random.choice(mutation_types)
        
        if mutation == 'add_comment':
            # Add misleading comments
            comments = [
                "// Bounds checked above",
                "// Input sanitized",
                "// Memory properly allocated",
                "// Safe operation confirmed",
                "// Validated by security team"
            ]
            insert_pos = random.randint(0, len(lines))
            lines.insert(insert_pos, random.choice(comments))
            
        elif mutation == 'modify_variable':
            # Modify variable names to be more misleading
            replacements = {
                'buffer': 'safe_buffer',
                'input': 'validated_input',
                'ptr': 'safe_ptr',
                'query': 'sanitized_query'
            }
            for i, line in enumerate(lines):
                for old, new in replacements.items():
                    if old in line:
                        lines[i] = line.replace(old, new)
                        break
        
        elif mutation == 'add_vulnerability':
            # Add additional vulnerability patterns
            vuln_patterns = [
                "strcpy(temp, user_data); // Fast copy",
                "system(command); // Execute utility",
                "free(ptr); // Cleanup memory",
                "sprintf(msg, format, data); // Format message"
            ]
            insert_pos = random.randint(0, len(lines))
            lines.insert(insert_pos, random.choice(vuln_patterns))
            
        else:  # obfuscate
            # Add obfuscating code
            obfuscations = [
                "if(1) { // Always true condition",
                "int dummy = 0; // Temporary variable",
                "/* Multi-line comment for clarity */",
                "#ifdef DEBUG",
                "#endif"
            ]
            insert_pos = random.randint(0, len(lines))
            lines.insert(insert_pos, random.choice(obfuscations))
        
        return '\n'.join(lines)
    
    def run(self, original_data_path=None, prediction_file_path=None):
        """
        Run the adversarial learning process
        
        Args:
            original_data_path: Path to original data CSV (if None, will create synthetic data)
            prediction_file_path: Path to txt file containing model predictions (optional)
            
        Returns:
            Best adversarial code snippet
        """
        print("\n===== ADVERSARIAL LEARNING DIAGNOSTICS =====")
        
        # Load predictions from txt file if provided
        if prediction_file_path:
            print(f"Loading predictions from: {prediction_file_path}")
            self.load_predictions_from_txt(prediction_file_path)
        
        # Load or create original data
        if original_data_path and os.path.exists(original_data_path):
            original_df = pd.read_csv(original_data_path)
            if 'functionSource' not in original_df.columns or 'label' not in original_df.columns:
                raise ValueError("Original data must contain 'functionSource' and 'label' columns")
            print(f"Loaded original data from {original_data_path}")
        else:
            # Create synthetic data for testing purposes
            print("No original data path provided, creating synthetic data for testing...")
            synthetic_functions = [
                "void func1() { char buf[100]; return; }",
                "int func2(char* input) { int len = strlen(input); return len; }",
                "void func3() { int* ptr = malloc(sizeof(int)); free(ptr); }",
                "char* func4(int size) { return malloc(size); }",
                "void func5(char* str) { printf(\"%s\", str); }",
                "int func6() { char buffer[256]; gets(buffer); return 0; }",
                "void func7(char* dest, char* src) { strcpy(dest, src); }",
                "int func8(char* cmd) { return system(cmd); }",
            ]
            
            # Repeat synthetic functions to match attack pool size if needed
            num_samples = max(len(self.attack_pool), 50)  # At least 50 samples
            extended_functions = []
            for i in range(num_samples):
                base_func = synthetic_functions[i % len(synthetic_functions)]
                # Add variation to make functions unique
                modified_func = base_func.replace("func", f"func_{i}")
                extended_functions.append(modified_func)
            
            original_df = pd.DataFrame({
                'functionSource': extended_functions,
                'label': np.zeros(len(extended_functions), dtype=int)  # All non-vulnerable initially
            })
            
            # Add some vulnerable examples
            vulnerable_indices = np.random.choice(len(original_df), len(original_df) // 3, replace=False)
            original_df.loc[vulnerable_indices, 'label'] = 1
            print(f"Created synthetic data with {len(original_df)} samples ({len(vulnerable_indices)} vulnerable)")
        
        # Print data composition
        vulnerable_count = (original_df['label'] == 1).sum()
        benign_count = (original_df['label'] == 0).sum()
        print(f"Data composition: {vulnerable_count} vulnerable, {benign_count} benign samples")
        
        if vulnerable_count == 0:
            print("WARNING: No vulnerable samples in dataset! Creating synthetic vulnerable examples.")
            # Create some synthetic vulnerable examples
            n_synthetic = max(10, len(original_df) // 4)
            synthetic_indices = np.random.choice(len(original_df), n_synthetic, replace=False)
            original_df.loc[synthetic_indices, 'label'] = 1
            vulnerable_count = (original_df['label'] == 1).sum()
            print(f"Added {vulnerable_count} synthetic vulnerable examples")
        
        # Initialize population and centroids
        self.initialize_population()
        
        # Create more aggressive adversarial examples for the initial population
        print("Enhancing attack pool with more aggressive adversarial examples...")
        aggressive_examples = [
            # Comments to mislead
            "// This is a harmless comment that might confuse the model",
            "/* This code was previously flagged as vulnerable */",
            "// Code reviewed by security team",
            
            # Actual vulnerable patterns but used in harmless ways
            "char buffer[100]; // Large enough buffer",
            "if(strlen(input) < sizeof(buffer)) strcpy(buffer, input);",
            "system(\"echo Hello\"); // Fixed command, not injection",
            
            # Confusing constructs
            "int *ptr = malloc(10); if(ptr != NULL) { free(ptr); ptr = NULL; }",
            "char *sql = \"SELECT * FROM users WHERE id=?\"; // Parameterized query",
            "FILE *f = fopen(filename, \"r\"); if(f != NULL) { fclose(f); }",
            
            # Code that looks harmless but has subtle issues
            "char *p = malloc(strlen(s)); strcpy(p, s); // Missing +1 for null terminator",
            "int size = n * sizeof(int); char *buf = malloc(size); // Potential integer overflow",
            "printf(\"Value: %s\", str); // Potentially format string vulnerability if str contains %",
            
            # More aggressive exploits (obvious vulnerabilities)
            "gets(buffer); // Known buffer overflow",
            "strcpy(dst, src); // No bounds checking",
            "system(user_input); // Command injection",
            "exec(user_input); // Command execution",
            "sprintf(query, \"SELECT * FROM users WHERE name='%s'\", user_input); // SQL injection",
            "free(ptr); free(ptr); // Double free"
        ]
        
        # Replace some population members with these examples
        population_keys = list(self.population.keys())
        for i in range(min(len(aggressive_examples), len(population_keys))):
            self.population[aggressive_examples[i]] = 0
            if i < len(population_keys):
                del self.population[population_keys[i]]
        
        # Initialize model if needed (only if predictions weren't loaded from txt)
        if self.original_predictions is None:
            print("Initializing model...")
            if not hasattr(self, 'model') or self.model is None:
                if self.trainer is None:
                    self.trainer = CodeBERTTrainer(batch_size=8, epochs=3)
                
                # Check if we have a pre-trained model to load
                if self.model_path and os.path.exists(self.model_path):
                    # Load pre-trained model
                    print(f"Loading model from {self.model_path}")
                    self.model = self.trainer.load_model(self.model_path)
                    
                    # CRITICAL FIX: Ensure model is in evaluation mode after loading
                    if self.model is not None:
                        self.model.eval()
                        if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                            self.trainer.model.eval()
                        print("Model loaded successfully and set to evaluation mode")
                    else:
                        print("ERROR: Model loading returned None!")
                        raise ValueError("Failed to load model from specified path")
                else:
                    # Train a new model only if no pre-trained model exists
                    print("Training a new model")
                    from sklearn.model_selection import train_test_split
                    train_data, test_data = train_test_split(original_df, test_size=0.2, random_state=42)
                    
                    # Set trainer data
                    self.trainer.set_data(train_data)
                    
                    # Prepare data loaders
                    data_loaders = self.trainer.prepare_data(train_data, test_data)
                    
                    # Train the model
                    self.model = self.trainer.train_model(data_loaders, freeze_bert=False)
                    print("Model trained successfully")
            else:
                print("Using pre-loaded model")
                # CRITICAL FIX: Ensure the pre-loaded model is in evaluation mode
                if hasattr(self, 'model') and self.model is not None:
                    self.model.eval()
                if hasattr(self, 'trainer') and hasattr(self.trainer, 'model') and self.trainer.model is not None:
                    self.trainer.model.eval()
        else:
            print("Using loaded predictions from txt file, skipping model initialization")
            # Still need trainer for adversarial predictions if model_path is provided
            if self.model_path and not hasattr(self, 'trainer'):
                print("Loading model for adversarial prediction generation...")
                self.trainer = CodeBERTTrainer()
                self.model = self.trainer.load_model(self.model_path)
                if self.model is not None:
                    self.model.eval()
                    if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                        self.trainer.model.eval()
            elif self.model_path and hasattr(self, 'trainer') and self.trainer is None:
                print("Loading model for adversarial prediction generation...")
                self.trainer = CodeBERTTrainer()
                self.model = self.trainer.load_model(self.model_path)
                if self.model is not None:
                    self.model.eval()
                    if hasattr(self.trainer, 'model') and self.trainer.model is not None:
                        self.trainer.model.eval()
            elif not self.model_path:
                print("Warning: No model path provided and using loaded predictions.")
                print("Adversarial predictions cannot be generated without a model.")
                # Initialize a dummy trainer to prevent AttributeError
                self.trainer = None
        
        # Calculate initial fitness scores
        print("Calculating initial fitness scores...")
        
        # Make sure the model is available for calculate_fitness
        # FIXED: Handle case when using loaded predictions and no trainer is available
        if hasattr(self, 'model') and self.model is not None:
            model = self.model
        elif hasattr(self, 'trainer') and self.trainer is not None and hasattr(self.trainer, 'model'):
            model = self.trainer.model
        else:
            model = None
        
        # DEBUG: Validate model predictions on original data (only if trainer is available)
        if self.original_predictions is None and hasattr(self, 'trainer') and self.trainer is not None:
            print("\n===== MODEL PREDICTION VALIDATION =====")
            # Get counts of vulnerable samples in original data
            vulnerable_count = (original_df['label'] == 1).sum()
            print(f"Dataset has {vulnerable_count} labeled vulnerable samples out of {len(original_df)} total")
            
            # Check original predictions
            if hasattr(self.trainer, 'predict'):
                correct_predictions = 0
                vulnerable_correctly_identified = 0
                vulnerable_samples = original_df['label'] == 1
                
                for idx, row in original_df.iterrows():
                    code = row['functionSource']
                    true_label = row['label']
                    try:
                        pred = self.trainer.predict(code)
                        if isinstance(pred, dict) and 'prediction' in pred:
                            prediction = pred['prediction']
                            if prediction == true_label:
                                correct_predictions += 1
                                if true_label == 1:
                                    vulnerable_correctly_identified += 1
                            
                            # Print info for all vulnerable samples
                            if true_label == 1:
                                print(f"Vulnerable sample {idx}: Predicted as {'vulnerable' if prediction == 1 else 'benign'}")
                    except Exception as e:
                        print(f"Error predicting sample {idx}: {str(e)}")
                
                accuracy = correct_predictions / len(original_df) if len(original_df) > 0 else 0
                vulnerability_recall = vulnerable_correctly_identified / vulnerable_count if vulnerable_count > 0 else 0
                
                print(f"Model accuracy: {accuracy:.4f} ({correct_predictions}/{len(original_df)})")
                print(f"Vulnerability detection rate: {vulnerability_recall:.4f} ({vulnerable_correctly_identified}/{vulnerable_count})")
                
                if vulnerability_recall < 0.1:
                    print("WARNING: Model is detecting very few vulnerabilities, adversarial attacks will likely fail!")
                    print("Consider retraining the model or providing clearer vulnerable examples.")
                    
                    # Create more obvious vulnerable examples for testing
                    if vulnerable_correctly_identified == 0:
                        print("CRITICAL: No vulnerabilities detected. Creating synthetic examples for testing.")
                        # Create an obvious example with a known vulnerability
                        test_code = """void vulnerable_func() {
                            char buffer[10];
                            gets(buffer);  // Known buffer overflow
                            printf("%s", buffer);
                        }"""
                        
                        try:
                            pred = self.trainer.predict(test_code)
                            print(f"Test vulnerability prediction: {pred}")
                            if isinstance(pred, dict) and pred.get('prediction') != 1:
                                print("SEVERE WARNING: Model fails to detect even obvious vulnerabilities!")
                        except Exception as e:
                            print(f"Error in test prediction: {str(e)}")
        else:
            print("\n===== USING LOADED PREDICTIONS =====")
            print(f"Loaded {len(self.original_predictions)} predictions from txt file")
            print("Skipping model validation since predictions are pre-computed")
        
        # Track the actual attack success rates for the best code
        attack_success_rates = {}
        
        # Calculate fitness for each member of the population
        for adv_code in tqdm(list(self.population.keys()), desc="Initial fitness"):
            fitness, attack_rate = self.calculate_fitness(original_df, adv_code, model, return_attack_rate=True)
            self.population[adv_code] = fitness
            attack_success_rates[adv_code] = attack_rate
        
        # Diagnose the best initial adversarial code (only if trainer is available)
        if self.population and hasattr(self, 'trainer') and self.trainer is not None:
            best_initial_code = max(self.population.items(), key=lambda x: x[1])[0]
            print(f"\n=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===")
            self.diagnose_attack_effectiveness(original_df, best_initial_code, model)
        
        # Free GPU memory
        free_gpu_memory()
        
        # Run generations
        best_fitness = max(self.population.values()) if self.population else 0
        best_code = max(self.population.items(), key=lambda x: x[1])[0] if self.population else None
        best_attack_rate = attack_success_rates.get(best_code, 0.0)
        
        for gen in range(self.max_generations):
            if self.verbose:
                print(f"\n=== Generation {gen+1}/{self.max_generations} ===")
                print(f"Best fitness so far: {best_fitness:.4f}")
                print(f"Best attack success rate: {best_attack_rate:.4f}")
            
            # Perform fuzzy clustering
            membership_weights, centroid_change = self.perform_fuzzy_clustering()
            
            # Select top clusters
            top_clusters = self.select_clusters()
            
            # Perform crossover
            offspring = self.perform_crossover(membership_weights, top_clusters)
            
            # Calculate fitness for offspring
            offspring_fitness = []
            for adv_code in tqdm(offspring, desc="Offspring fitness"):
                fitness, attack_rate = self.calculate_fitness(original_df, adv_code, model, return_attack_rate=True)
                offspring_fitness.append(fitness)
                attack_success_rates[adv_code] = attack_rate
            
            # Update population with offspring
            self.population = update_global_pop(offspring, self.population, offspring_fitness)
            
            # Check for new best fitness
            current_best = max(self.population.values())
            if current_best > best_fitness:
                best_fitness = current_best
                best_code = max(self.population.items(), key=lambda x: x[1])[0]
                best_attack_rate = attack_success_rates.get(best_code, 0.0)
                
                if self.verbose:
                    print(f"New best fitness: {best_fitness:.4f}")
                    print(f"New best attack success rate: {best_attack_rate:.4f}")
                    print(f"Best code snippet length: {len(best_code.splitlines())}")
            
            # Check for perfect attack (100% success rate)
            if best_fitness > 0.99 - self.penalty:  # Allow for length penalty
                if self.verbose:
                    print(f"Found optimal adversarial code with fitness {best_fitness:.4f}")
                break
            
            # Check for convergence
            if centroid_change < 1e-6:
                if self.verbose:
                    print(f"Converged after {gen+1} generations with best fitness {best_fitness:.4f}")
                break
        
        # Calculate the direct attack success rate with the best code
        # This matches the logic in the user's code sample
        print("\n=== Direct Attack Success Rate Calculation ===")
        
        # Use loaded predictions if available, otherwise get predictions from model
        if self.original_predictions is not None:
            print("Using loaded predictions for direct attack calculation...")
            original_predictions = self.original_predictions.copy()
        else:
            # Get predictions on original data using model
            def get_predictions(df):
                predictions = []
                if hasattr(self, 'trainer') and self.trainer is not None:
                    for _, row in df.iterrows():
                        code = row['functionSource']
                        try:
                            pred = self.trainer.predict(code)
                            if isinstance(pred, dict) and 'prediction' in pred:
                                predictions.append(pred['prediction'])
                            else:
                                predictions.append(0)
                        except Exception as e:
                            if self.verbose >= 2:
                                print(f"Error in prediction: {str(e)}")
                            predictions.append(0)
                else:
                    # No trainer available, return all zeros
                    predictions = [0] * len(df)
                return np.array(predictions)
            
            print("Getting original predictions from model...")
            original_predictions = get_predictions(original_df)
        
        # Generate adversarial predictions only if trainer is available
        if hasattr(self, 'trainer') and self.trainer is not None:
            # Create a copy for adversarial testing
            adv_df = original_df.copy()
            vulnerable_samples = adv_df['label'] == 1
            num_vulnerable = vulnerable_samples.sum()
            
            print(f"Found {num_vulnerable} vulnerable samples for adversarial testing")
            
            # Insert adversarial code into vulnerable samples
            for idx in adv_df.index[vulnerable_samples]:
                orig_code = adv_df.loc[idx, 'functionSource']
                code_lines = orig_code.split('\n')
                insert_pos = min(15, max(1, len(code_lines) - 1))  # Try to use position 15 like in user's code
                code_lines.insert(insert_pos, best_code)
                adv_df.loc[idx, 'functionSource'] = '\n'.join(code_lines)
            
            # Get adversarial predictions
            print("Getting adversarial predictions...")
            adversarial_predictions = []
            for _, row in adv_df.iterrows():
                code = row['functionSource']
                try:
                    pred = self.trainer.predict(code)
                    if isinstance(pred, dict) and 'prediction' in pred:
                        adversarial_predictions.append(pred['prediction'])
                    else:
                        adversarial_predictions.append(0)
                except Exception as e:
                    if self.verbose >= 2:
                        print(f"Error in adversarial prediction: {str(e)}")
                    adversarial_predictions.append(0)
            
            adversarial_predictions = np.array(adversarial_predictions)
            
            # Calculate direct attack success rate (percent of predictions that changed)
            vul_indices = np.where(original_df['label'] == 1)[0]
            prediction_changes = sum(1 for i in vul_indices 
                                   if original_predictions[i] != adversarial_predictions[i])
            
            direct_attack_success_rate = prediction_changes / len(vul_indices) if len(vul_indices) > 0 else 0
            
            # Count how many 1→0 changes (matching user's code logic - vulnerable to benign)
            one_to_zero = sum(1 for i in vul_indices
                              if original_predictions[i] == 1 and adversarial_predictions[i] == 0)
            
            one_to_zero_rate = one_to_zero / len(vul_indices) if len(vul_indices) > 0 else 0
        else:
            print("No trainer available for direct attack calculation, using fitness-based estimates")
            direct_attack_success_rate = best_attack_rate  # Use the best attack rate from fitness calculation
            one_to_zero_rate = best_attack_rate
            one_to_zero = int(best_attack_rate * (original_df['label'] == 1).sum())
            vul_indices = np.where(original_df['label'] == 1)[0]
        
        print("\n=== Final Attack Success Results ===")
        print(f"Overall Attack Success Rate (any change): {best_attack_rate:.4f}")
        print(f"Direct Attack Success Rate (vulnerable samples only): {direct_attack_success_rate:.4f}")
        print(f"Vulnerable to Benign Changes (1→0): {one_to_zero_rate:.4f} ({one_to_zero}/{len(vul_indices)})")
        print(f"Fitness Score: {best_fitness:.4f}")
        
        best_snippet_length = len(best_code.splitlines())
        length_penalty = self.penalty * best_snippet_length
        print(f"Length Penalty: {length_penalty:.4f}")
        print(f"Code Snippet Length: {best_snippet_length}")
        
        # Generate and save adversarial predictions with the best code
        print("\n=== GENERATING ADVERSARIAL PREDICTIONS ===")
        
        # Apply the best adversarial code to create adversarial dataset
        final_adv_df = original_df.copy()
        vulnerable_samples = final_adv_df['label'] == 1
        
        # Insert best adversarial code into vulnerable samples
        for idx in final_adv_df.index[vulnerable_samples]:
            orig_code = final_adv_df.loc[idx, 'functionSource']
            code_lines = orig_code.split('\n')
            insert_pos = min(15, max(1, len(code_lines) - 1))
            code_lines.insert(insert_pos, best_code)
            final_adv_df.loc[idx, 'functionSource'] = '\n'.join(code_lines)
        
        # Generate adversarial predictions
        if hasattr(self, 'trainer') and self.trainer is not None:
            print("Generating adversarial predictions with best code...")
            final_adversarial_predictions = []
            
            for _, row in tqdm(final_adv_df.iterrows(), desc="Generating adversarial predictions", total=len(final_adv_df)):
                code = row['functionSource']
                try:
                    pred = self.trainer.predict(code)
                    if isinstance(pred, dict) and 'prediction' in pred:
                        final_adversarial_predictions.append(pred['prediction'])
                    else:
                        final_adversarial_predictions.append(0)
                except Exception as e:
                    if self.verbose >= 2:
                        print(f"Error in final adversarial prediction: {str(e)}")
                    final_adversarial_predictions.append(0)
            
            final_adversarial_predictions = np.array(final_adversarial_predictions)
            
            # Extract dataset name from original_data_path for consistent naming
            dataset_name = "test"  # Default
            if original_data_path:
                # Extract CWE ID from path like 'cwe399_test.csv'
                import re
                cwe_match = re.search(r'cwe(\d+)', os.path.basename(original_data_path).lower())
                if cwe_match:
                    dataset_name = f"cwe{cwe_match.group(1)}"
            
            # Save adversarial predictions
            adv_predictions_path = self.save_adversarial_predictions(
                final_adversarial_predictions, 
                dataset_name
            )
            
            # Calculate final adversarial attack statistics
            if self.original_predictions is not None:
                original_preds = self.original_predictions
            else:
                # Use original predictions from earlier calculation
                original_preds = original_predictions
            
            # Calculate attack effectiveness on final adversarial predictions
            total_changes = np.sum(original_preds != final_adversarial_predictions)
            vuln_to_benign = np.sum((original_preds == 1) & (final_adversarial_predictions == 0))
            benign_to_vuln = np.sum((original_preds == 0) & (final_adversarial_predictions == 1))
            
            print(f"\n=== FINAL ADVERSARIAL ATTACK RESULTS ===")
            print(f"Total prediction changes: {total_changes}/{len(original_preds)} ({total_changes/len(original_preds):.4f})")
            print(f"Vulnerable→Benign changes: {vuln_to_benign}")
            print(f"Benign→Vulnerable changes: {benign_to_vuln}")
            print(f"Adversarial predictions saved to: {adv_predictions_path}")
            
            # Add adversarial predictions info to results
            results = {
                'best_adversarial_code': best_code,
                'best_fitness': best_fitness,
                'attack_success_rate': best_attack_rate,
                'direct_attack_success_rate': direct_attack_success_rate,
                'vulnerable_to_benign_rate': one_to_zero_rate,
                'parameters': {
                    'pop_size': self.pop_size,
                    'clusters': self.clusters,
                    'max_generations': self.max_generations,
                    'decay_rate': self.decay_rate,
                    'alpha': self.alpha,
                    'penalty': self.penalty
                },
                'adversarial_predictions_file': adv_predictions_path,
                'total_prediction_changes': int(total_changes),
                'vulnerable_to_benign_changes': int(vuln_to_benign),
                'benign_to_vulnerable_changes': int(benign_to_vuln)
            }
        else:
            print("No model available for generating adversarial predictions")
            results = {
                'best_adversarial_code': best_code,
                'best_fitness': best_fitness,
                'attack_success_rate': best_attack_rate,
                'direct_attack_success_rate': direct_attack_success_rate,
                'vulnerable_to_benign_rate': one_to_zero_rate,
                'parameters': {
                    'pop_size': self.pop_size,
                    'clusters': self.clusters,
                    'max_generations': self.max_generations,
                    'decay_rate': self.decay_rate,
                    'alpha': self.alpha,
                    'penalty': self.penalty
                }
            }
        
        # Extract CWE ID from original_data_path to create filename suffix
        if original_data_path:
            # Extract CWE ID from path like '/kaggle/input/eatvul/cwe399_test.csv'
            import re
            cwe_match = re.search(r'cwe(\d+)', os.path.basename(original_data_path).lower())
            if cwe_match:
                cwe_id = cwe_match.group(1)
                results_filename = f'adversarial_results_cwe{cwe_id}.json'
            else:
                results_filename = 'adversarial_results.json'
        else:
            results_filename = 'adversarial_results.json'
        
        # Save best adversarial code
        with open(results_filename, 'w') as f:
            json.dump(results, f, indent=2)
        
        print(f"Results saved to {results_filename}")
        
        return best_code, best_fitness

    def diagnose_attack_effectiveness(self, original_df, adversarial_code, model=None):
        """
        Detailed diagnosis of why an adversarial attack might be failing
        """
        print(f"\n=== ATTACK EFFECTIVENESS DIAGNOSIS ===")
        print(f"Adversarial code being tested:")
        print("-" * 50)
        print(adversarial_code)
        print("-" * 50)
        
        # Get model predictions function
        def get_prediction_details(code):
            try:
                pred = self.trainer.predict(code)
                return pred
            except Exception as e:
                print(f"Error in prediction: {str(e)}")
                return {'prediction': 0, 'confidence': 0.5, 'probabilities': [0.5, 0.5]}
        
        # Test on a few vulnerable samples
        vulnerable_samples = original_df[original_df['label'] == 1].head(10)
        
        print(f"\nTesting on {len(vulnerable_samples)} vulnerable samples:")
        
        successful_attacks = 0
        for idx, row in vulnerable_samples.iterrows():
            function_code = row['functionSource']
            
            # Get original prediction
            orig_pred = get_prediction_details(function_code)
            
            # If original is not predicted as vulnerable, skip
            if orig_pred['prediction'] != 1:
                print(f"Sample {idx}: Originally not detected as vulnerable (pred={orig_pred['prediction']}, conf={orig_pred['confidence']:.3f})")
                continue
            
            # Create adversarial version
            code_lines = function_code.split('\n')
            # Insert adversarial code at position 2 (after function declaration typically)
            insert_pos = min(2, len(code_lines) - 1)
            code_lines.insert(insert_pos, adversarial_code)
            adv_code = '\n'.join(code_lines)
            
            # Get adversarial prediction
            adv_pred = get_prediction_details(adv_code)
            
            # Check if attack was successful
            attack_success = (orig_pred['prediction'] == 1 and adv_pred['prediction'] == 0)
            if attack_success:
                successful_attacks += 1
            
            print(f"Sample {idx}:")
            print(f"  Original: pred={orig_pred['prediction']}, conf={orig_pred['confidence']:.3f}")
            print(f"  Adversarial: pred={adv_pred['prediction']}, conf={adv_pred['confidence']:.3f}")
            print(f"  Attack success: {attack_success}")
            print(f"  Confidence change: {orig_pred['confidence']:.3f} -> {adv_pred['confidence']:.3f}")
            
            # Show a snippet of the adversarial code
            print(f"  Adversarial code snippet:")
            adv_lines = adv_code.split('\n')
            for i, line in enumerate(adv_lines[max(0, insert_pos-1):insert_pos+3]):
                marker = ">>> " if i == 1 else "    "
                print(f"    {marker}{line}")
            print()
        
        attack_rate = successful_attacks / len(vulnerable_samples) if len(vulnerable_samples) > 0 else 0
        print(f"Overall attack success rate: {attack_rate:.4f} ({successful_attacks}/{len(vulnerable_samples)})")
        
        # Additional diagnostics
        print(f"\n=== ADDITIONAL DIAGNOSTICS ===")
        
        # Test if the adversarial code itself is detected as vulnerable
        test_func = f"""void test_function() {{
    {adversarial_code}
    return;
}}"""
        
        test_pred = get_prediction_details(test_func)
        print(f"Adversarial code in isolation:")
        print(f"  Prediction: {test_pred['prediction']} (0=benign, 1=vulnerable)")
        print(f"  Confidence: {test_pred['confidence']:.3f}")
        
        if test_pred['prediction'] == 0:
            print("  -> Adversarial code itself is not detected as vulnerable")
            print("  -> This might explain low attack success rates")
        else:
            print("  -> Adversarial code is detected as vulnerable when isolated")
            print("  -> The problem might be in how it's inserted into existing code")
        
        return attack_rate

    def load_predictions_from_txt(self, prediction_file_path):
        """
        Load model predictions from exported txt file
        
        Args:
            prediction_file_path: Path to the txt file containing predictions
            
        Returns:
            numpy array of predictions
        """
        if not os.path.exists(prediction_file_path):
            raise FileNotFoundError(f"Prediction file not found: {prediction_file_path}")
        
        predictions = []
        
        if self.verbose:
            print(f"\n=== LOADING PREDICTIONS FROM TXT ===")
            print(f"Loading predictions from: {prediction_file_path}")
        
        with open(prediction_file_path, 'r') as f:
            for line_num, line in enumerate(f):
                line = line.strip()
                if line:  # Skip empty lines
                    try:
                        parts = line.split('\t')
                        if len(parts) == 2:
                            index, prediction = parts
                            predictions.append(int(prediction))
                        else:
                            # Try space separator if tab doesn't work
                            parts = line.split()
                            if len(parts) == 2:
                                index, prediction = parts
                                predictions.append(int(prediction))
                            else:
                                print(f"Warning: Skipping malformed line {line_num + 1}: {line}")
                    except ValueError as e:
                        print(f"Warning: Error parsing line {line_num + 1}: {line} - {str(e)}")
        
        predictions = np.array(predictions)
        
        if self.verbose:
            print(f"Loaded {len(predictions)} predictions")
            print(f"Prediction distribution: {np.bincount(predictions)}")
            print(f"  0 (not vulnerable): {np.sum(predictions == 0)}")
            print(f"  1 (vulnerable): {np.sum(predictions == 1)}")
        
        self.original_predictions = predictions
        self.prediction_file_path = prediction_file_path
        
        return predictions
    
    def save_adversarial_predictions(self, adversarial_predictions, dataset_name="test"):
        """
        Save adversarial predictions to txt file
        
        Args:
            adversarial_predictions: Array of adversarial predictions
            dataset_name: Name to include in filename
            
        Returns:
            Path to the saved file
        """
        # Create timestamp for unique filename
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        
        # Create filename
        if "cwe" in dataset_name.lower():
            filename = f"prediction_adv_{dataset_name}_{timestamp}.txt"
        else:
            filename = f"prediction_adv_cwe_{timestamp}.txt"
        
        # Determine output directory - handle read-only input directories
        output_dir = os.getcwd()  # Default to current working directory
        
        if self.prediction_file_path:
            input_dir = os.path.dirname(self.prediction_file_path)
            
            # Test if the input directory is writable
            try:
                test_file = os.path.join(input_dir, '.test_write_permission')
                with open(test_file, 'w') as f:
                    f.write('test')
                os.remove(test_file)
                # If we get here, the directory is writable
                output_dir = input_dir
                if self.verbose:
                    print(f"Using input directory for output: {output_dir}")
            except (OSError, PermissionError):
                # Directory is read-only, use current working directory
                output_dir = os.getcwd()
                if self.verbose:
                    print(f"Input directory is read-only, using current directory: {output_dir}")
        
        output_path = os.path.join(output_dir, filename)
        
        # Write predictions to file
        with open(output_path, 'w') as f:
            for idx, pred in enumerate(adversarial_predictions):
                f.write(f"{idx}\t{pred}\n")
        
        if self.verbose:
            print(f"Adversarial predictions exported to: {output_path}")
            print(f"Total adversarial predictions exported: {len(adversarial_predictions)}")
        
        return output_path


# CWE 399

In [12]:

# Run adversarial learning with improved parameters
adv_learning = AdversarialLearning(
    attack_pool_path='/kaggle/input/eatvul/cwe399_attack_pool.csv',
    model_path='/kaggle/input/eatvul/cwe399-model/model', # Default path, can be updated
    pop_size=20,             # Increased population size for better diversity
    clusters=5,              # Increased clusters for more diversity
    max_generations=1,      # More generations for better evolution
    decay_rate=0.8,          # Reduced decay rate for better selection pressure
    alpha=1.0,               # Reduced alpha for sharper clustering
    penalty=0.0005,          # Much smaller penalty to allow longer snippets
    verbose=2                # Increased verbosity for better debugging
)

# Run with the specified original data path
best_code, best_fitness = adv_learning.run(original_data_path='/kaggle/input/eatvul/cwe399_test.csv',prediction_file_path='/kaggle/input/eatvul/predict_codebert_cwe399.txt')

print("\n=== Final Results ===")
print(f"Best adversarial code fitness: {best_fitness:.4f}")
print(f"Best adversarial code snippet length: {len(best_code.splitlines())}")
print(f"Best adversarial code:")
print("-" * 50)
print(best_code)
print("-" * 50)


=== ATTACK POOL LOADING ===
Raw attack pool shape: (200, 3)
Available columns: ['original_code', 'adversarial_code', 'label']
Detected attack pool format with 'adversarial_code' column
Attack pool processed successfully:
  Initial size: 200
  After removing NaN: 200
  Final shape: (200, 1)
Sample adversarial codes:
  [1] nsSMILTimeContainer* container_var = nullptr;
double aoffsetseconds_var = aOffsetSeconds;
nsSMILTime...
  [2] void* sa_var;
struct task_struct* task_struct_var;
struct k_sigaction* ka_var;
if(task_struct_var &&...
  [3] const int const_var = 0;
nsPresContext* nsprescontext_var = nullptr;
nsIFrame* getparent_var = nullp...
Loading model from /kaggle/input/eatvul/cwe399-model/model
Loading best model checkpoint
Loaded tokenizer from saved model
Loaded training history. Best validation accuracy: 0.8880
Model loaded successfully and set to evaluation mode
Successfully loaded model from /kaggle/input/eatvul/cwe399-model/model
Model is in eval mode: True
Model test predicti

Initial fitness:   0%|          | 0/20 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:   5%|▌         | 1/20 [00:15<04:48, 15.20s/it]

Attack success rate: 0.9819 (272/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 272/732 (0.3716)
Adversarial snippet length: 6
Length penalty: 0.0030
Fitness score: 1.1789

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  10%|█         | 2/20 [00:30<04:33, 15.21s/it]

Attack success rate: 0.9819 (272/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 272/732 (0.3716)
Adversarial snippet length: 7
Length penalty: 0.0035
Fitness score: 1.1784

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  15%|█▌        | 3/20 [00:45<04:17, 15.16s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  20%|██        | 4/20 [01:00<04:02, 15.13s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  25%|██▌       | 5/20 [01:15<03:46, 15.12s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  30%|███       | 6/20 [01:30<03:31, 15.10s/it]

Attack success rate: 0.9639 (267/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 267/732 (0.3648)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1634

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  35%|███▌      | 7/20 [01:45<03:16, 15.10s/it]

Attack success rate: 0.9783 (271/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 271/732 (0.3702)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1778

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  40%|████      | 8/20 [02:00<03:01, 15.09s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  45%|████▌     | 9/20 [02:16<02:46, 15.10s/it]

Attack success rate: 0.9783 (271/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 271/732 (0.3702)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1778

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  50%|█████     | 10/20 [02:31<02:30, 15.09s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  55%|█████▌    | 11/20 [02:46<02:15, 15.09s/it]

Attack success rate: 0.9928 (275/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 275/732 (0.3757)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1923

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  60%|██████    | 12/20 [03:01<02:00, 15.09s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  65%|██████▌   | 13/20 [03:16<01:45, 15.09s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  70%|███████   | 14/20 [03:31<01:30, 15.07s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  75%|███████▌  | 15/20 [03:46<01:15, 15.06s/it]

Attack success rate: 0.9675 (268/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 268/732 (0.3661)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1670

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  80%|████████  | 16/20 [04:01<01:00, 15.05s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  85%|████████▌ | 17/20 [04:16<00:45, 15.06s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  90%|█████████ | 18/20 [04:31<00:30, 15.06s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness:  95%|█████████▌| 19/20 [04:46<00:15, 15.06s/it]

Attack success rate: 0.9783 (271/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 271/732 (0.3702)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1778

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Initial fitness: 100%|██████████| 20/20 [05:01<00:00, 15.09s/it]

Attack success rate: 0.9639 (267/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 267/732 (0.3648)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1634

=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===

=== ATTACK EFFECTIVENESS DIAGNOSIS ===
Adversarial code being tested:
--------------------------------------------------
FILE *f = fopen(filename, "r"); if(f != NULL) { fclose(f); }
--------------------------------------------------

Testing on 10 vulnerable samples:
Sample 4: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 6: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 7: Originally not detected as vulnerable (pred=0, conf=0.998)
Sample 8: Originally not detected as vulnerable (pred=0, conf=0.994)
Sample 9: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 11: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 12: Originally not detected as vulnerable (pred=0, co




Sample 20: Originally not detected as vulnerable (pred=0, conf=0.997)
Overall attack success rate: 0.0000 (0/10)

=== ADDITIONAL DIAGNOSTICS ===
Adversarial code in isolation:
  Prediction: 0 (0=benign, 1=vulnerable)
  Confidence: 0.999
  -> Adversarial code itself is not detected as vulnerable
  -> This might explain low attack success rates

=== Generation 1/1 ===
Best fitness so far: 1.1923
Best attack success rate: 0.9928
Updated centroids: [1.17357982 1.17362036 1.17356523 1.17370407 1.17365254]
Centroid change: 3.221766
Selected top clusters: [3 4] with centroids [1.17370407 1.17365254]
Created 10 offspring through enhanced crossover


Offspring fitness:   0%|          | 0/10 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  10%|█         | 1/10 [00:15<02:15, 15.08s/it]

Attack success rate: 0.9928 (275/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 275/732 (0.3757)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1923

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  20%|██        | 2/10 [00:30<02:00, 15.08s/it]

Attack success rate: 0.9928 (275/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 275/732 (0.3757)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1923

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  30%|███       | 3/10 [00:45<01:45, 15.06s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  40%|████      | 4/10 [01:00<01:30, 15.05s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1742

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  50%|█████     | 5/10 [01:15<01:15, 15.05s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  60%|██████    | 6/10 [01:30<01:00, 15.05s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  70%|███████   | 7/10 [01:45<00:45, 15.04s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  80%|████████  | 8/10 [02:00<00:30, 15.05s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1706

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness:  90%|█████████ | 9/10 [02:15<00:15, 15.05s/it]

Attack success rate: 0.9711 (269/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 269/732 (0.3675)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 1.1701

=== DATASET COMPOSITION ===
Total samples: 732
Labeled as vulnerable: 333
Labeled as benign: 399
Applying adversarial code to 333 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 333 total
  → Predicted as vulnerable: 277
  → Predicted as benign: 56
Benign samples (label=0): 399 total
  → Predicted as vulnerable: 44
  → Predicted as benign: 355
Model accuracy: 0.8634
Vulnerable recall: 0.8318
Samples available for attack: 277
Using model to predict adversarial samples


Offspring fitness: 100%|██████████| 10/10 [02:30<00:00, 15.05s/it]

Attack success rate: 0.9747 (270/277 samples changed prediction)
  - 0→1 changes: 3/732 (0.0041)
  - 1→0 changes: 270/732 (0.3689)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 1.1737
Found optimal adversarial code with fitness 1.1923

=== Direct Attack Success Rate Calculation ===
Using loaded predictions for direct attack calculation...
Found 333 vulnerable samples for adversarial testing
Getting adversarial predictions...






=== Final Attack Success Results ===
Overall Attack Success Rate (any change): 0.9928
Direct Attack Success Rate (vulnerable samples only): 0.8138
Vulnerable to Benign Changes (1→0): 0.8078 (269/333)
Fitness Score: 1.1923
Length Penalty: 0.0005
Code Snippet Length: 1

=== GENERATING ADVERSARIAL PREDICTIONS ===
Generating adversarial predictions with best code...


Generating adversarial predictions: 100%|██████████| 732/732 [00:15<00:00, 48.34it/s]

Input directory is read-only, using current directory: /kaggle/working
Adversarial predictions exported to: /kaggle/working/prediction_adv_cwe399_2025-06-02_05-09-13.txt
Total adversarial predictions exported: 732

=== FINAL ADVERSARIAL ATTACK RESULTS ===
Total prediction changes: 314/732 (0.4290)
Vulnerable→Benign changes: 311
Benign→Vulnerable changes: 3
Adversarial predictions saved to: /kaggle/working/prediction_adv_cwe399_2025-06-02_05-09-13.txt
Results saved to adversarial_results_cwe399.json

=== Final Results ===
Best adversarial code fitness: 1.1923
Best adversarial code snippet length: 1
Best adversarial code:
--------------------------------------------------
FILE *f = fopen(filename, "r"); if(f != NULL) { fclose(f); }
--------------------------------------------------





# CWE 119

In [13]:

# Run adversarial learning with improved parameters
adv_learning = AdversarialLearning(
    attack_pool_path='/kaggle/input/eatvul/cwe399_attack_pool.csv',
    model_path='/kaggle/input/eatvul/cwe119-model/model', # Default path, can be updated
    pop_size=20,             # Increased population size for better diversity
    clusters=5,              # Increased clusters for more diversity
    max_generations=1,      # More generations for better evolution
    decay_rate=0.8,          # Reduced decay rate for better selection pressure
    alpha=1.0,               # Reduced alpha for sharper clustering
    penalty=0.0005,          # Much smaller penalty to allow longer snippets
    verbose=2                # Increased verbosity for better debugging
)

# Run with the specified original data path
best_code, best_fitness = adv_learning.run(original_data_path='/kaggle/input/eatvul/cwe119_test.csv',prediction_file_path='/kaggle/input/eatvul/predict_codebert_cwe119.txt')

print("\n=== Final Results ===")
print(f"Best adversarial code fitness: {best_fitness:.4f}")
print(f"Best adversarial code snippet length: {len(best_code.splitlines())}")
print(f"Best adversarial code:")
print("-" * 50)
print(best_code)
print("-" * 50)


=== ATTACK POOL LOADING ===
Raw attack pool shape: (200, 3)
Available columns: ['original_code', 'adversarial_code', 'label']
Detected attack pool format with 'adversarial_code' column
Attack pool processed successfully:
  Initial size: 200
  After removing NaN: 200
  Final shape: (200, 1)
Sample adversarial codes:
  [1] nsSMILTimeContainer* container_var = nullptr;
double aoffsetseconds_var = aOffsetSeconds;
nsSMILTime...
  [2] void* sa_var;
struct task_struct* task_struct_var;
struct k_sigaction* ka_var;
if(task_struct_var &&...
  [3] const int const_var = 0;
nsPresContext* nsprescontext_var = nullptr;
nsIFrame* getparent_var = nullp...
Loading model from /kaggle/input/eatvul/cwe119-model/model
Loading best model checkpoint
Loaded tokenizer from saved model
Loaded training history. Best validation accuracy: 0.9213
Model loaded successfully and set to evaluation mode
Successfully loaded model from /kaggle/input/eatvul/cwe119-model/model
Model is in eval mode: True
Model test predicti

Initial fitness:   0%|          | 0/20 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:   5%|▌         | 1/20 [00:18<05:54, 18.67s/it]

Attack success rate: 0.2850 (110/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 110/851 (0.1293)
Adversarial snippet length: 6
Length penalty: 0.0030
Fitness score: 0.2820

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  10%|█         | 2/20 [00:37<05:35, 18.62s/it]

Attack success rate: 0.5440 (210/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 210/851 (0.2468)
Adversarial snippet length: 6
Length penalty: 0.0030
Fitness score: 0.6410

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  15%|█▌        | 3/20 [00:55<05:16, 18.61s/it]

Attack success rate: 0.6813 (263/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 263/851 (0.3090)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7808

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  20%|██        | 4/20 [01:14<04:57, 18.59s/it]

Attack success rate: 0.6865 (265/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 265/851 (0.3114)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7860

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  25%|██▌       | 5/20 [01:32<04:37, 18.52s/it]

Attack success rate: 0.6632 (256/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 256/851 (0.3008)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7627

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  30%|███       | 6/20 [01:51<04:18, 18.43s/it]

Attack success rate: 0.6554 (253/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 253/851 (0.2973)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7549

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  35%|███▌      | 7/20 [02:09<03:59, 18.41s/it]

Attack success rate: 0.6399 (247/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 247/851 (0.2902)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7394

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  40%|████      | 8/20 [02:27<03:40, 18.37s/it]

Attack success rate: 0.6477 (250/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 250/851 (0.2938)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7472

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  45%|████▌     | 9/20 [02:46<03:21, 18.34s/it]

Attack success rate: 0.6295 (243/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 243/851 (0.2855)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7290

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  50%|█████     | 10/20 [03:04<03:03, 18.31s/it]

Attack success rate: 0.6684 (258/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 258/851 (0.3032)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7679

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  55%|█████▌    | 11/20 [03:22<02:44, 18.31s/it]

Attack success rate: 0.6166 (238/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 238/851 (0.2797)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7161

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  60%|██████    | 12/20 [03:40<02:26, 18.31s/it]

Attack success rate: 0.6295 (243/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 243/851 (0.2855)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7290

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  65%|██████▌   | 13/20 [03:59<02:08, 18.31s/it]

Attack success rate: 0.6503 (251/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 251/851 (0.2949)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7498

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  70%|███████   | 14/20 [04:17<01:49, 18.32s/it]

Attack success rate: 0.6347 (245/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 245/851 (0.2879)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7342

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  75%|███████▌  | 15/20 [04:36<01:31, 18.37s/it]

Attack success rate: 0.6762 (261/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 261/851 (0.3067)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7757

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  80%|████████  | 16/20 [04:54<01:13, 18.40s/it]

Attack success rate: 0.6632 (256/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 256/851 (0.3008)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7627

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  85%|████████▌ | 17/20 [05:12<00:55, 18.40s/it]

Attack success rate: 0.6477 (250/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 250/851 (0.2938)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7472

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  90%|█████████ | 18/20 [05:31<00:36, 18.46s/it]

Attack success rate: 0.6632 (256/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 256/851 (0.3008)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7627

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness:  95%|█████████▌| 19/20 [05:50<00:18, 18.49s/it]

Attack success rate: 0.6736 (260/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 260/851 (0.3055)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7731

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Initial fitness: 100%|██████████| 20/20 [06:08<00:00, 18.43s/it]

Attack success rate: 0.6218 (240/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 240/851 (0.2820)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7213

=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===

=== ATTACK EFFECTIVENESS DIAGNOSIS ===
Adversarial code being tested:
--------------------------------------------------
/* This code was previously flagged as vulnerable */
--------------------------------------------------

Testing on 10 vulnerable samples:
Sample 1: Originally not detected as vulnerable (pred=0, conf=0.695)
Sample 3: Originally not detected as vulnerable (pred=0, conf=0.625)
Sample 4: Originally not detected as vulnerable (pred=0, conf=0.619)
Sample 5:
  Original: pred=1, conf=0.613
  Adversarial: pred=1, conf=0.907
  Attack success: False
  Confidence change: 0.613 -> 0.907
  Adversarial code snippet:
        uint32_t code, const Parcel &data, Parcel *reply, uint32_t flags) {
    >>> /* This code was previously 




Sample 9:
  Original: pred=1, conf=0.981
  Adversarial: pred=1, conf=0.985
  Attack success: False
  Confidence change: 0.981 -> 0.985
  Adversarial code snippet:
        {
    >>> /* This code was previously flagged as vulnerable */
        struct sshbuf *b = NULL;
        struct sshcomp *comp;

Sample 10: Originally not detected as vulnerable (pred=0, conf=0.693)
Sample 11: Originally not detected as vulnerable (pred=0, conf=0.692)
Sample 13: Originally not detected as vulnerable (pred=0, conf=0.570)
Sample 14: Originally not detected as vulnerable (pred=0, conf=0.559)
Overall attack success rate: 0.0000 (0/10)

=== ADDITIONAL DIAGNOSTICS ===
Adversarial code in isolation:
  Prediction: 0 (0=benign, 1=vulnerable)
  Confidence: 0.695
  -> Adversarial code itself is not detected as vulnerable
  -> This might explain low attack success rates

=== Generation 1/1 ===
Best fitness so far: 0.7860
Best attack success rate: 0.6865
Updated centroids: [0.52126137 0.66447834 0.74481632 0.7377965

Offspring fitness:   0%|          | 0/10 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  10%|█         | 1/10 [00:18<02:46, 18.52s/it]

Attack success rate: 0.6399 (247/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 247/851 (0.2902)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7394

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  20%|██        | 2/10 [00:36<02:27, 18.47s/it]

Attack success rate: 0.6399 (247/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 247/851 (0.2902)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7394

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  30%|███       | 3/10 [00:55<02:09, 18.44s/it]

Attack success rate: 0.6995 (270/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 270/851 (0.3173)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.7985

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  40%|████      | 4/10 [01:13<01:50, 18.48s/it]

Attack success rate: 0.6865 (265/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 265/851 (0.3114)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7860

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  50%|█████     | 5/10 [01:32<01:32, 18.54s/it]

Attack success rate: 0.6736 (260/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 260/851 (0.3055)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.7726

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  60%|██████    | 6/10 [01:51<01:14, 18.52s/it]

Attack success rate: 0.6813 (263/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 263/851 (0.3090)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7808

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  70%|███████   | 7/10 [02:09<00:55, 18.52s/it]

Attack success rate: 0.6554 (253/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 253/851 (0.2973)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7549

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  80%|████████  | 8/10 [02:28<00:37, 18.51s/it]

Attack success rate: 0.6554 (253/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 253/851 (0.2973)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7549

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness:  90%|█████████ | 9/10 [02:46<00:18, 18.48s/it]

Attack success rate: 0.6218 (240/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 240/851 (0.2820)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7213

=== DATASET COMPOSITION ===
Total samples: 851
Labeled as vulnerable: 425
Labeled as benign: 426
Applying adversarial code to 425 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 425 total
  → Predicted as vulnerable: 386
  → Predicted as benign: 39
Benign samples (label=0): 426 total
  → Predicted as vulnerable: 35
  → Predicted as benign: 391
Model accuracy: 0.9130
Vulnerable recall: 0.9082
Samples available for attack: 386
Using model to predict adversarial samples


Offspring fitness: 100%|██████████| 10/10 [03:04<00:00, 18.49s/it]

Attack success rate: 0.6218 (240/386 samples changed prediction)
  - 0→1 changes: 45/851 (0.0529)
  - 1→0 changes: 240/851 (0.2820)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7213
New best fitness: 0.7985
New best attack success rate: 0.6995
Best code snippet length: 2

=== Direct Attack Success Rate Calculation ===
Using loaded predictions for direct attack calculation...
Found 425 vulnerable samples for adversarial testing
Getting adversarial predictions...






=== Final Attack Success Results ===
Overall Attack Success Rate (any change): 0.6995
Direct Attack Success Rate (vulnerable samples only): 0.6376
Vulnerable to Benign Changes (1→0): 0.6235 (265/425)
Fitness Score: 0.7985
Length Penalty: 0.0010
Code Snippet Length: 2

=== GENERATING ADVERSARIAL PREDICTIONS ===
Generating adversarial predictions with best code...


Generating adversarial predictions: 100%|██████████| 851/851 [00:18<00:00, 46.06it/s]

Input directory is read-only, using current directory: /kaggle/working
Adversarial predictions exported to: /kaggle/working/prediction_adv_cwe119_2025-06-02_05-40-59.txt
Total adversarial predictions exported: 851

=== FINAL ADVERSARIAL ATTACK RESULTS ===
Total prediction changes: 329/851 (0.3866)
Vulnerable→Benign changes: 284
Benign→Vulnerable changes: 45
Adversarial predictions saved to: /kaggle/working/prediction_adv_cwe119_2025-06-02_05-40-59.txt
Results saved to adversarial_results_cwe119.json

=== Final Results ===
Best adversarial code fitness: 0.7985
Best adversarial code snippet length: 2
Best adversarial code:
--------------------------------------------------
#ifdef DEBUG
/* This code was previously flagged as vulnerable */
--------------------------------------------------





# CWE 189

In [14]:

# Run adversarial learning with improved parameters
adv_learning = AdversarialLearning(
    attack_pool_path='/kaggle/input/eatvul/cwe399_attack_pool.csv',
    model_path='/kaggle/input/eatvul/cwe189-model/model', # Default path, can be updated
    pop_size=20,             # Increased population size for better diversity
    clusters=5,              # Increased clusters for more diversity
    max_generations=1,      # More generations for better evolution
    decay_rate=0.8,          # Reduced decay rate for better selection pressure
    alpha=1.0,               # Reduced alpha for sharper clustering
    penalty=0.0005,          # Much smaller penalty to allow longer snippets
    verbose=2                # Increased verbosity for better debugging
)

# Run with the specified original data path
best_code, best_fitness = adv_learning.run(original_data_path='/kaggle/input/eatvul/cwe189_test.csv',prediction_file_path='/kaggle/input/eatvul/predict_codebert_cwe189.txt')

print("\n=== Final Results ===")
print(f"Best adversarial code fitness: {best_fitness:.4f}")
print(f"Best adversarial code snippet length: {len(best_code.splitlines())}")
print(f"Best adversarial code:")
print("-" * 50)
print(best_code)
print("-" * 50)


=== ATTACK POOL LOADING ===
Raw attack pool shape: (200, 3)
Available columns: ['original_code', 'adversarial_code', 'label']
Detected attack pool format with 'adversarial_code' column
Attack pool processed successfully:
  Initial size: 200
  After removing NaN: 200
  Final shape: (200, 1)
Sample adversarial codes:
  [1] nsSMILTimeContainer* container_var = nullptr;
double aoffsetseconds_var = aOffsetSeconds;
nsSMILTime...
  [2] void* sa_var;
struct task_struct* task_struct_var;
struct k_sigaction* ka_var;
if(task_struct_var &&...
  [3] const int const_var = 0;
nsPresContext* nsprescontext_var = nullptr;
nsIFrame* getparent_var = nullp...
Loading model from /kaggle/input/eatvul/cwe189-model/model
Loading best model checkpoint
Loaded tokenizer from saved model
Loaded training history. Best validation accuracy: 0.9213
Model loaded successfully and set to evaluation mode
Successfully loaded model from /kaggle/input/eatvul/cwe189-model/model
Model is in eval mode: True
Model test predicti

Initial fitness:   0%|          | 0/20 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:   5%|▌         | 1/20 [00:03<00:57,  3.04s/it]

Attack success rate: 0.1667 (10/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 10/135 (0.0741)
Adversarial snippet length: 5
Length penalty: 0.0025
Fitness score: 0.1642

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  10%|█         | 2/20 [00:06<00:54,  3.01s/it]

Attack success rate: 0.5333 (32/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 32/135 (0.2370)
Adversarial snippet length: 6
Length penalty: 0.0030
Fitness score: 0.6303

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  15%|█▌        | 3/20 [00:08<00:50,  2.98s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  20%|██        | 4/20 [00:11<00:47,  2.97s/it]

Attack success rate: 0.7000 (42/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 42/135 (0.3111)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  25%|██▌       | 5/20 [00:14<00:44,  2.97s/it]

Attack success rate: 0.7000 (42/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 42/135 (0.3111)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  30%|███       | 6/20 [00:17<00:41,  2.96s/it]

Attack success rate: 0.8000 (48/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 48/135 (0.3556)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  35%|███▌      | 7/20 [00:20<00:38,  2.96s/it]

Attack success rate: 0.7000 (42/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 42/135 (0.3111)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  40%|████      | 8/20 [00:23<00:35,  2.97s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  45%|████▌     | 9/20 [00:26<00:32,  2.97s/it]

Attack success rate: 0.8000 (48/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 48/135 (0.3556)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  50%|█████     | 10/20 [00:29<00:29,  2.97s/it]

Attack success rate: 0.7500 (45/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 45/135 (0.3333)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8495

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  55%|█████▌    | 11/20 [00:32<00:26,  2.98s/it]

Attack success rate: 0.7167 (43/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 43/135 (0.3185)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8162

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  60%|██████    | 12/20 [00:35<00:23,  2.98s/it]

Attack success rate: 0.7333 (44/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 44/135 (0.3259)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8328

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  65%|██████▌   | 13/20 [00:38<00:20,  2.97s/it]

Attack success rate: 0.7167 (43/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 43/135 (0.3185)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8162

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  70%|███████   | 14/20 [00:41<00:17,  2.96s/it]

Attack success rate: 0.7667 (46/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 46/135 (0.3407)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8662

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  75%|███████▌  | 15/20 [00:44<00:14,  2.95s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  80%|████████  | 16/20 [00:47<00:11,  2.95s/it]

Attack success rate: 0.8167 (49/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 49/135 (0.3630)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0162

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  85%|████████▌ | 17/20 [00:50<00:08,  2.95s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  90%|█████████ | 18/20 [00:53<00:05,  2.94s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness:  95%|█████████▌| 19/20 [00:56<00:02,  2.94s/it]

Attack success rate: 0.7167 (43/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 43/135 (0.3185)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8162

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Initial fitness: 100%|██████████| 20/20 [00:59<00:00,  2.96s/it]

Attack success rate: 0.6833 (41/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 41/135 (0.3037)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7828

=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===

=== ATTACK EFFECTIVENESS DIAGNOSIS ===
Adversarial code being tested:
--------------------------------------------------
strcpy(dst, src); // No bounds checking
--------------------------------------------------

Testing on 10 vulnerable samples:
Sample 0:
  Original: pred=1, conf=0.602
  Adversarial: pred=0, conf=0.573
  Attack success: True
  Confidence change: 0.602 -> 0.573
  Adversarial code snippet:
        GfxColorSpace *colorSpaceA) {
    >>> strcpy(dst, src); // No bounds checking
        GfxIndexedColorSpace *indexedCS;
        GfxSeparationColorSpace *sepCS;

Sample 1: Originally not detected as vulnerable (pred=0, conf=0.694)
Sample 2: Originally not detected as vulnerable (pred=0, conf=0.691)
Sample 3: Originally not detec




Sample 9:
  Original: pred=1, conf=0.591
  Adversarial: pred=1, conf=0.652
  Attack success: False
  Confidence change: 0.591 -> 0.652
  Adversarial code snippet:
        int stream_id, const media::AudioParameters& params,
    >>> strcpy(dst, src); // No bounds checking
        const std::string& device_id, bool automatic_gain_control) {
        VLOG(1) << "AudioInputRendererHost::OnCreateStream(stream_id="

Sample 12: Originally not detected as vulnerable (pred=0, conf=0.692)
Overall attack success rate: 0.1000 (1/10)

=== ADDITIONAL DIAGNOSTICS ===
Adversarial code in isolation:
  Prediction: 0 (0=benign, 1=vulnerable)
  Confidence: 0.694
  -> Adversarial code itself is not detected as vulnerable
  -> This might explain low attack success rates

=== Generation 1/1 ===
Best fitness so far: 1.0162
Best attack success rate: 0.8167
Updated centroids: [0.72098803 0.8388479  0.8182163  0.41437104 0.8410537 ]
Centroid change: 1.970908
Selected top clusters: [4 1] with centroids [0.8410537 

Offspring fitness:   0%|          | 0/10 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  10%|█         | 1/10 [00:02<00:26,  2.95s/it]

Attack success rate: 0.7500 (45/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 45/135 (0.3333)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8490

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  20%|██        | 2/10 [00:05<00:23,  2.95s/it]

Attack success rate: 0.7167 (43/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 43/135 (0.3185)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8162

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  30%|███       | 3/10 [00:08<00:20,  2.94s/it]

Attack success rate: 0.7500 (45/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 45/135 (0.3333)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8490

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  40%|████      | 4/10 [00:11<00:17,  2.94s/it]

Attack success rate: 0.8000 (48/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 48/135 (0.3556)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  50%|█████     | 5/10 [00:14<00:14,  2.94s/it]

Attack success rate: 0.7667 (46/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 46/135 (0.3407)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8657

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  60%|██████    | 6/10 [00:17<00:11,  2.94s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  70%|███████   | 7/10 [00:20<00:08,  2.94s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  80%|████████  | 8/10 [00:23<00:05,  2.93s/it]

Attack success rate: 0.7500 (45/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 45/135 (0.3333)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8490

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness:  90%|█████████ | 9/10 [00:26<00:02,  2.93s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828

=== DATASET COMPOSITION ===
Total samples: 135
Labeled as vulnerable: 67
Labeled as benign: 68
Applying adversarial code to 67 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 67 total
  → Predicted as vulnerable: 60
  → Predicted as benign: 7
Benign samples (label=0): 68 total
  → Predicted as vulnerable: 2
  → Predicted as benign: 66
Model accuracy: 0.9333
Vulnerable recall: 0.8955
Samples available for attack: 60
Using model to predict adversarial samples


Offspring fitness: 100%|██████████| 10/10 [00:29<00:00,  2.94s/it]

Attack success rate: 0.7833 (47/60 samples changed prediction)
  - 0→1 changes: 11/135 (0.0815)
  - 1→0 changes: 47/135 (0.3481)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8828
Found optimal adversarial code with fitness 1.0162

=== Direct Attack Success Rate Calculation ===
Using loaded predictions for direct attack calculation...
Found 67 vulnerable samples for adversarial testing
Getting adversarial predictions...






=== Final Attack Success Results ===
Overall Attack Success Rate (any change): 0.8167
Direct Attack Success Rate (vulnerable samples only): 0.7612
Vulnerable to Benign Changes (1→0): 0.7313 (49/67)
Fitness Score: 1.0162
Length Penalty: 0.0005
Code Snippet Length: 1

=== GENERATING ADVERSARIAL PREDICTIONS ===
Generating adversarial predictions with best code...


Generating adversarial predictions: 100%|██████████| 135/135 [00:02<00:00, 45.69it/s]

Input directory is read-only, using current directory: /kaggle/working
Adversarial predictions exported to: /kaggle/working/prediction_adv_cwe189_2025-06-02_05-42-37.txt
Total adversarial predictions exported: 135

=== FINAL ADVERSARIAL ATTACK RESULTS ===
Total prediction changes: 61/135 (0.4519)
Vulnerable→Benign changes: 50
Benign→Vulnerable changes: 11
Adversarial predictions saved to: /kaggle/working/prediction_adv_cwe189_2025-06-02_05-42-37.txt
Results saved to adversarial_results_cwe189.json

=== Final Results ===
Best adversarial code fitness: 1.0162
Best adversarial code snippet length: 1
Best adversarial code:
--------------------------------------------------
strcpy(dst, src); // No bounds checking
--------------------------------------------------





# CWE 416

In [15]:

# Run adversarial learning with improved parameters
adv_learning = AdversarialLearning(
    attack_pool_path='/kaggle/input/eatvul/cwe399_attack_pool.csv',
    model_path='/kaggle/input/eatvul/cwe416-model/model', # Default path, can be updated
    pop_size=20,             # Increased population size for better diversity
    clusters=5,              # Increased clusters for more diversity
    max_generations=1,      # More generations for better evolution
    decay_rate=0.8,          # Reduced decay rate for better selection pressure
    alpha=1.0,               # Reduced alpha for sharper clustering
    penalty=0.0005,          # Much smaller penalty to allow longer snippets
    verbose=2                # Increased verbosity for better debugging
)

# Run with the specified original data path
best_code, best_fitness = adv_learning.run(original_data_path='/kaggle/input/eatvul/cwe416_test.csv',prediction_file_path='/kaggle/input/eatvul/predict_codebert_cwe416.txt')

print("\n=== Final Results ===")
print(f"Best adversarial code fitness: {best_fitness:.4f}")
print(f"Best adversarial code snippet length: {len(best_code.splitlines())}")
print(f"Best adversarial code:")
print("-" * 50)
print(best_code)
print("-" * 50)


=== ATTACK POOL LOADING ===
Raw attack pool shape: (200, 3)
Available columns: ['original_code', 'adversarial_code', 'label']
Detected attack pool format with 'adversarial_code' column
Attack pool processed successfully:
  Initial size: 200
  After removing NaN: 200
  Final shape: (200, 1)
Sample adversarial codes:
  [1] nsSMILTimeContainer* container_var = nullptr;
double aoffsetseconds_var = aOffsetSeconds;
nsSMILTime...
  [2] void* sa_var;
struct task_struct* task_struct_var;
struct k_sigaction* ka_var;
if(task_struct_var &&...
  [3] const int const_var = 0;
nsPresContext* nsprescontext_var = nullptr;
nsIFrame* getparent_var = nullp...
Loading model from /kaggle/input/eatvul/cwe416-model/model
Loading best model checkpoint
Loaded tokenizer from saved model
Loaded training history. Best validation accuracy: 0.9213
Model loaded successfully and set to evaluation mode
Successfully loaded model from /kaggle/input/eatvul/cwe416-model/model
Model is in eval mode: True
Model test predicti

Initial fitness:   0%|          | 0/20 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:   5%|▌         | 1/20 [00:02<00:55,  2.89s/it]

Attack success rate: 0.6750 (27/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 27/132 (0.2045)
Adversarial snippet length: 5
Length penalty: 0.0025
Fitness score: 0.7725

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  10%|█         | 2/20 [00:05<00:51,  2.89s/it]

Attack success rate: 0.3750 (15/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 15/132 (0.1136)
Adversarial snippet length: 7
Length penalty: 0.0035
Fitness score: 0.3715

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  15%|█▌        | 3/20 [00:08<00:48,  2.87s/it]

Attack success rate: 0.7250 (29/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 29/132 (0.2197)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8245

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  20%|██        | 4/20 [00:11<00:45,  2.86s/it]

Attack success rate: 0.7750 (31/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 31/132 (0.2348)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8745

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  25%|██▌       | 5/20 [00:14<00:42,  2.85s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  30%|███       | 6/20 [00:17<00:39,  2.85s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  35%|███▌      | 7/20 [00:20<00:37,  2.85s/it]

Attack success rate: 0.7250 (29/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 29/132 (0.2197)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8245

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  40%|████      | 8/20 [00:22<00:34,  2.86s/it]

Attack success rate: 0.7250 (29/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 29/132 (0.2197)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8245

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  45%|████▌     | 9/20 [00:25<00:31,  2.85s/it]

Attack success rate: 0.6500 (26/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 26/132 (0.1970)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7495

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  50%|█████     | 10/20 [00:28<00:28,  2.86s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  55%|█████▌    | 11/20 [00:31<00:25,  2.87s/it]

Attack success rate: 0.7000 (28/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 28/132 (0.2121)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  60%|██████    | 12/20 [00:34<00:22,  2.87s/it]

Attack success rate: 0.7250 (29/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 29/132 (0.2197)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8245

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  65%|██████▌   | 13/20 [00:37<00:19,  2.85s/it]

Attack success rate: 0.6750 (27/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 27/132 (0.2045)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7745

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  70%|███████   | 14/20 [00:40<00:17,  2.86s/it]

Attack success rate: 0.7000 (28/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 28/132 (0.2121)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  75%|███████▌  | 15/20 [00:42<00:14,  2.85s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8495

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  80%|████████  | 16/20 [00:45<00:11,  2.84s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8495

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  85%|████████▌ | 17/20 [00:48<00:08,  2.84s/it]

Attack success rate: 0.7750 (31/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 31/132 (0.2348)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8745

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  90%|█████████ | 18/20 [00:51<00:05,  2.84s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness:  95%|█████████▌| 19/20 [00:54<00:02,  2.83s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Initial fitness: 100%|██████████| 20/20 [00:57<00:00,  2.85s/it]

Attack success rate: 0.6750 (27/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 27/132 (0.2045)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.7745

=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===

=== ATTACK EFFECTIVENESS DIAGNOSIS ===
Adversarial code being tested:
--------------------------------------------------
// Code reviewed by security team
--------------------------------------------------

Testing on 10 vulnerable samples:
Sample 3: Originally not detected as vulnerable (pred=0, conf=0.692)
Sample 7: Originally not detected as vulnerable (pred=0, conf=0.692)
Sample 12: Originally not detected as vulnerable (pred=0, conf=0.631)
Sample 14: Originally not detected as vulnerable (pred=0, conf=0.690)
Sample 15:
  Original: pred=1, conf=0.563
  Adversarial: pred=0, conf=0.566
  Attack success: True
  Confidence change: 0.563 -> 0.566
  Adversarial code snippet:
               const int64* original_values, size_t original_co




Sample 21:
  Original: pred=1, conf=0.935
  Adversarial: pred=1, conf=0.860
  Attack success: False
  Confidence change: 0.935 -> 0.860
  Adversarial code snippet:
        struct load_command lc = {0, 0};
    >>> // Code reviewed by security team
        ut8 loadc[sizeof (struct load_command)] = {0};
        bool is_first_thread = true;

Sample 22: Originally not detected as vulnerable (pred=0, conf=0.691)
Overall attack success rate: 0.1000 (1/10)

=== ADDITIONAL DIAGNOSTICS ===
Adversarial code in isolation:
  Prediction: 0 (0=benign, 1=vulnerable)
  Confidence: 0.695
  -> Adversarial code itself is not detected as vulnerable
  -> This might explain low attack success rates

=== Generation 1/1 ===
Best fitness so far: 0.8995
Best attack success rate: 0.8000
Updated centroids: [0.78063157 0.58879869 0.82534275 0.83934815 0.83230299]
Centroid change: 2.346233
Selected top clusters: [3 4] with centroids [0.83934815 0.83230299]
Created 10 offspring through enhanced crossover


Offspring fitness:   0%|          | 0/10 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  10%|█         | 1/10 [00:02<00:25,  2.85s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  20%|██        | 2/10 [00:05<00:22,  2.84s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  30%|███       | 3/10 [00:08<00:19,  2.83s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8490

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  40%|████      | 4/10 [00:11<00:16,  2.83s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 0.8490

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  50%|█████     | 5/10 [00:14<00:14,  2.84s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  60%|██████    | 6/10 [00:17<00:11,  2.85s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  70%|███████   | 7/10 [00:19<00:08,  2.85s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8495

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  80%|████████  | 8/10 [00:22<00:05,  2.84s/it]

Attack success rate: 0.7500 (30/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 30/132 (0.2273)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8495

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness:  90%|█████████ | 9/10 [00:25<00:02,  2.84s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== DATASET COMPOSITION ===
Total samples: 132
Labeled as vulnerable: 66
Labeled as benign: 66
Applying adversarial code to 66 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 66 total
  → Predicted as vulnerable: 40
  → Predicted as benign: 26
Benign samples (label=0): 66 total
  → Predicted as vulnerable: 5
  → Predicted as benign: 61
Model accuracy: 0.7652
Vulnerable recall: 0.6061
Samples available for attack: 40
Using model to predict adversarial samples


Offspring fitness: 100%|██████████| 10/10 [00:28<00:00,  2.84s/it]

Attack success rate: 0.8000 (32/40 samples changed prediction)
  - 0→1 changes: 12/132 (0.0909)
  - 1→0 changes: 32/132 (0.2424)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 0.8995

=== Direct Attack Success Rate Calculation ===
Using loaded predictions for direct attack calculation...
Found 66 vulnerable samples for adversarial testing
Getting adversarial predictions...






=== Final Attack Success Results ===
Overall Attack Success Rate (any change): 0.8000
Direct Attack Success Rate (vulnerable samples only): 0.5303
Vulnerable to Benign Changes (1→0): 0.4394 (29/66)
Fitness Score: 0.8995
Length Penalty: 0.0005
Code Snippet Length: 1

=== GENERATING ADVERSARIAL PREDICTIONS ===
Generating adversarial predictions with best code...


Generating adversarial predictions: 100%|██████████| 132/132 [00:02<00:00, 46.07it/s]

Input directory is read-only, using current directory: /kaggle/working
Adversarial predictions exported to: /kaggle/working/prediction_adv_cwe416_2025-06-02_05-44-13.txt
Total adversarial predictions exported: 132

=== FINAL ADVERSARIAL ATTACK RESULTS ===
Total prediction changes: 46/132 (0.3485)
Vulnerable→Benign changes: 33
Benign→Vulnerable changes: 13
Adversarial predictions saved to: /kaggle/working/prediction_adv_cwe416_2025-06-02_05-44-13.txt
Results saved to adversarial_results_cwe416.json

=== Final Results ===
Best adversarial code fitness: 0.8995
Best adversarial code snippet length: 1
Best adversarial code:
--------------------------------------------------
// Code reviewed by security team
--------------------------------------------------





# CWE 20

In [16]:

# Run adversarial learning with improved parameters
adv_learning = AdversarialLearning(
    attack_pool_path='/kaggle/input/eatvul/cwe399_attack_pool.csv',
    model_path='/kaggle/input/eatvul/cwe20-model/model', # Default path, can be updated
    pop_size=20,             # Increased population size for better diversity
    clusters=5,              # Increased clusters for more diversity
    max_generations=1,      # More generations for better evolution
    decay_rate=0.8,          # Reduced decay rate for better selection pressure
    alpha=1.0,               # Reduced alpha for sharper clustering
    penalty=0.0005,          # Much smaller penalty to allow longer snippets
    verbose=2                # Increased verbosity for better debugging
)

# Run with the specified original data path
best_code, best_fitness = adv_learning.run(original_data_path='/kaggle/input/eatvul/cwe20_test.csv',prediction_file_path='/kaggle/input/eatvul/predict_codebert_cwe20.txt')

print("\n=== Final Results ===")
print(f"Best adversarial code fitness: {best_fitness:.4f}")
print(f"Best adversarial code snippet length: {len(best_code.splitlines())}")
print(f"Best adversarial code:")
print("-" * 50)
print(best_code)
print("-" * 50)


=== ATTACK POOL LOADING ===
Raw attack pool shape: (200, 3)
Available columns: ['original_code', 'adversarial_code', 'label']
Detected attack pool format with 'adversarial_code' column
Attack pool processed successfully:
  Initial size: 200
  After removing NaN: 200
  Final shape: (200, 1)
Sample adversarial codes:
  [1] nsSMILTimeContainer* container_var = nullptr;
double aoffsetseconds_var = aOffsetSeconds;
nsSMILTime...
  [2] void* sa_var;
struct task_struct* task_struct_var;
struct k_sigaction* ka_var;
if(task_struct_var &&...
  [3] const int const_var = 0;
nsPresContext* nsprescontext_var = nullptr;
nsIFrame* getparent_var = nullp...
Loading model from /kaggle/input/eatvul/cwe20-model/model
Loading best model checkpoint
Loaded tokenizer from saved model
Loaded training history. Best validation accuracy: 0.9344
Model loaded successfully and set to evaluation mode
Successfully loaded model from /kaggle/input/eatvul/cwe20-model/model
Model is in eval mode: True
Model test prediction

Initial fitness:   0%|          | 0/20 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:   5%|▌         | 1/20 [00:09<03:08,  9.91s/it]

Attack success rate: 0.8757 (148/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 148/457 (0.3239)
Adversarial snippet length: 6
Length penalty: 0.0030
Fitness score: 1.0727

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  10%|█         | 2/20 [00:19<02:57,  9.88s/it]

Attack success rate: 0.8935 (151/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 151/457 (0.3304)
Adversarial snippet length: 8
Length penalty: 0.0040
Fitness score: 1.0895

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  15%|█▌        | 3/20 [00:29<02:46,  9.81s/it]

Attack success rate: 0.8639 (146/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 146/457 (0.3195)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0634

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  20%|██        | 4/20 [00:39<02:36,  9.76s/it]

Attack success rate: 0.8580 (145/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 145/457 (0.3173)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0575

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  25%|██▌       | 5/20 [00:48<02:25,  9.72s/it]

Attack success rate: 0.8402 (142/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 142/457 (0.3107)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0397

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  30%|███       | 6/20 [00:58<02:16,  9.73s/it]

Attack success rate: 0.8639 (146/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 146/457 (0.3195)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0634

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  35%|███▌      | 7/20 [01:08<02:06,  9.72s/it]

Attack success rate: 0.8876 (150/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 150/457 (0.3282)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0871

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  40%|████      | 8/20 [01:17<01:56,  9.71s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  45%|████▌     | 9/20 [01:27<01:46,  9.72s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  50%|█████     | 10/20 [01:37<01:37,  9.72s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  55%|█████▌    | 11/20 [01:47<01:27,  9.71s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  60%|██████    | 12/20 [01:56<01:17,  9.72s/it]

Attack success rate: 0.8935 (151/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 151/457 (0.3304)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0930

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  65%|██████▌   | 13/20 [02:06<01:08,  9.76s/it]

Attack success rate: 0.8935 (151/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 151/457 (0.3304)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0930

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  70%|███████   | 14/20 [02:16<00:58,  9.77s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  75%|███████▌  | 15/20 [02:26<00:48,  9.76s/it]

Attack success rate: 0.8698 (147/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 147/457 (0.3217)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0693

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  80%|████████  | 16/20 [02:35<00:39,  9.76s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  85%|████████▌ | 17/20 [02:45<00:29,  9.75s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  90%|█████████ | 18/20 [02:55<00:19,  9.75s/it]

Attack success rate: 0.8935 (151/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 151/457 (0.3304)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0930

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness:  95%|█████████▌| 19/20 [03:05<00:09,  9.77s/it]

Attack success rate: 0.9053 (153/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 153/457 (0.3348)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.1048

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Initial fitness: 100%|██████████| 20/20 [03:15<00:00,  9.75s/it]

Attack success rate: 0.8994 (152/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 152/457 (0.3326)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0989

=== DIAGNOSING BEST INITIAL ADVERSARIAL CODE ===

=== ATTACK EFFECTIVENESS DIAGNOSIS ===
Adversarial code being tested:
--------------------------------------------------
sprintf(query, "SELECT * FROM users WHERE name='%s'", user_input); // SQL injection
--------------------------------------------------

Testing on 10 vulnerable samples:
Sample 1: Originally not detected as vulnerable (pred=0, conf=0.998)
Sample 4: Originally not detected as vulnerable (pred=0, conf=0.998)
Sample 5: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 8: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 11:
  Original: pred=1, conf=0.545
  Adversarial: pred=1, conf=0.516
  Attack success: False
  Confidence change: 0.545 -> 0.516
  Adversarial code snippet:
        chr




Sample 18: Originally not detected as vulnerable (pred=0, conf=0.999)
Sample 20: Originally not detected as vulnerable (pred=0, conf=0.999)
Overall attack success rate: 0.0000 (0/10)

=== ADDITIONAL DIAGNOSTICS ===
Adversarial code in isolation:
  Prediction: 0 (0=benign, 1=vulnerable)
  Confidence: 0.999
  -> Adversarial code itself is not detected as vulnerable
  -> This might explain low attack success rates

=== Generation 1/1 ===
Best fitness so far: 1.1048
Best attack success rate: 0.9053
Updated centroids: [1.07962606 1.08236998 1.07940206 1.08005754 1.08062478]
Centroid change: 2.060297
Selected top clusters: [1 4] with centroids [1.08236998 1.08062478]
Created 10 offspring through enhanced crossover


Offspring fitness:   0%|          | 0/10 [00:00<?, ?it/s]


=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  10%|█         | 1/10 [00:09<01:27,  9.70s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  20%|██        | 2/10 [00:19<01:17,  9.71s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  30%|███       | 3/10 [00:29<01:08,  9.72s/it]

Attack success rate: 0.8402 (142/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 142/457 (0.3107)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0397

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  40%|████      | 4/10 [00:38<00:58,  9.77s/it]

Attack success rate: 0.8402 (142/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 142/457 (0.3107)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0397

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  50%|█████     | 5/10 [00:48<00:48,  9.78s/it]

Attack success rate: 0.8698 (147/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 147/457 (0.3217)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0693

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  60%|██████    | 6/10 [00:58<00:39,  9.77s/it]

Attack success rate: 0.8698 (147/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 147/457 (0.3217)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0693

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  70%|███████   | 7/10 [01:08<00:29,  9.77s/it]

Attack success rate: 0.8639 (146/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 146/457 (0.3195)
Adversarial snippet length: 2
Length penalty: 0.0010
Fitness score: 1.0629

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  80%|████████  | 8/10 [01:18<00:19,  9.77s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness:  90%|█████████ | 9/10 [01:27<00:09,  9.78s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812

=== DATASET COMPOSITION ===
Total samples: 457
Labeled as vulnerable: 228
Labeled as benign: 229
Applying adversarial code to 228 vulnerable samples
Using loaded predictions from txt file

=== MODEL PERFORMANCE BREAKDOWN ===
Vulnerable samples (label=1): 228 total
  → Predicted as vulnerable: 169
  → Predicted as benign: 59
Benign samples (label=0): 229 total
  → Predicted as vulnerable: 26
  → Predicted as benign: 203
Model accuracy: 0.8140
Vulnerable recall: 0.7412
Samples available for attack: 169
Using model to predict adversarial samples


Offspring fitness: 100%|██████████| 10/10 [01:37<00:00,  9.76s/it]

Attack success rate: 0.8817 (149/169 samples changed prediction)
  - 0→1 changes: 23/457 (0.0503)
  - 1→0 changes: 149/457 (0.3260)
Adversarial snippet length: 1
Length penalty: 0.0005
Fitness score: 1.0812
Found optimal adversarial code with fitness 1.1048

=== Direct Attack Success Rate Calculation ===
Using loaded predictions for direct attack calculation...
Found 228 vulnerable samples for adversarial testing
Getting adversarial predictions...






=== Final Attack Success Results ===
Overall Attack Success Rate (any change): 0.9053
Direct Attack Success Rate (vulnerable samples only): 0.7237
Vulnerable to Benign Changes (1→0): 0.6579 (150/228)
Fitness Score: 1.1048
Length Penalty: 0.0005
Code Snippet Length: 1

=== GENERATING ADVERSARIAL PREDICTIONS ===
Generating adversarial predictions with best code...


Generating adversarial predictions: 100%|██████████| 457/457 [00:09<00:00, 46.35it/s]

Input directory is read-only, using current directory: /kaggle/working
Adversarial predictions exported to: /kaggle/working/prediction_adv_cwe20_2025-06-02_05-49-30.txt
Total adversarial predictions exported: 457

=== FINAL ADVERSARIAL ATTACK RESULTS ===
Total prediction changes: 196/457 (0.4289)
Vulnerable→Benign changes: 176
Benign→Vulnerable changes: 20
Adversarial predictions saved to: /kaggle/working/prediction_adv_cwe20_2025-06-02_05-49-30.txt
Results saved to adversarial_results_cwe20.json

=== Final Results ===
Best adversarial code fitness: 1.1048
Best adversarial code snippet length: 1
Best adversarial code:
--------------------------------------------------
sprintf(query, "SELECT * FROM users WHERE name='%s'", user_input); // SQL injection
--------------------------------------------------





# Other