In [None]:
import logging
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix
)
from transformers import (
    RobertaTokenizer, RobertaForSequenceClassification,
    Trainer, TrainingArguments, EarlyStoppingCallback, 
    TrainerCallback
)
from torch.utils.data import Dataset

# Configuration
CONFIG = {
    "DATA_PATH": "/root/workspace/npe_project/Dataset/NPEPatches.json",
    "MODEL_PATH": "/root/workspace/npe_project/Salesforce_CodeT5_base",
    "OUTPUT_DIR": "./results",
    "LOGS_DIR": "./logs",
    "MAX_LENGTH": 512,
    "TEST_SIZE": 0.2,
    "RANDOM_STATE": 42,
    "LABEL_MAPPING": {'NPE': 1, 'Non-NPE': 0}
}

# Training Arguments
TRAINING_CONFIG = {
    "num_train_epochs": 5,
    "per_device_train_batch_size": 8,
    "per_device_eval_batch_size": 8,
    "learning_rate": 3e-4,
    "weight_decay": 0.01,
    "fp16": True,
    "save_total_limit": 1
}

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('npe_training.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class NPECommitDataset(Dataset):
    """Dataset class for NPE commit classification."""
    
    def __init__(self, texts: list, labels: list, tokenizer, max_len: int = CONFIG["MAX_LENGTH"]):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self) -> int:
        return len(self.texts)

    def __getitem__(self, idx: int) -> dict:
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

class MetricsLoggerCallback(TrainerCallback):
    """Callback for logging training metrics."""
    
    def __init__(self):
        super().__init__()
        self.metrics_history = {
            "accuracy": [], "precision": [], "recall": [],
            "f1_score": [], "fpr": [], "fnr": []
        }

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        """Log metrics after evaluation."""
        if metrics is not None:
            for metric in self.metrics_history.keys():
                self.metrics_history[metric].append(
                    metrics.get(f"eval_{metric}", 0)
                )

def compute_metrics(eval_pred) -> dict:
    """Compute classification metrics."""
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)
    
    tn, fp, fn, tp = confusion_matrix(labels, predictions).ravel()
    
    return {
        "accuracy": accuracy_score(labels, predictions),
        "precision": precision_score(labels, predictions, average="binary"),
        "recall": recall_score(labels, predictions, average="binary"),
        "f1_score": f1_score(labels, predictions, average="binary"),
        "fpr": fp / (fp + tn),
        "fnr": fn / (fn + tp)
    }

def prepare_data():
    """Load and prepare data for training."""
    logger.info("Loading and preparing data...")
    
    # Load and clean data
    data = pd.read_csv(CONFIG["DATA_PATH"])
    data = data.drop_duplicates(subset=["Patch"])
    data = data.dropna(subset=["Category"])
    
    # Map labels
    data["Category"] = data["Category"].map(CONFIG["LABEL_MAPPING"])
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        data["Patch"], data["Category"],
        test_size=CONFIG["TEST_SIZE"],
        random_state=CONFIG["RANDOM_STATE"]
    )
    
    return X_train, X_test, y_train, y_test

def train_model():
    """Main training function."""
    try:
        # Prepare data
        X_train, X_test, y_train, y_test = prepare_data()
        
        # Initialize tokenizer and model
        tokenizer = RobertaTokenizer.from_pretrained(CONFIG["MODEL_PATH"])
        model = RobertaForSequenceClassification.from_pretrained(
            CONFIG["MODEL_PATH"], 
            num_labels=2
        )
        
        # Create datasets
        train_dataset = NPECommitDataset(
            X_train.tolist(), 
            y_train.tolist(), 
            tokenizer
        )
        test_dataset = NPECommitDataset(
            X_test.tolist(), 
            y_test.tolist(), 
            tokenizer
        )
        
        # Setup training arguments
        training_args = TrainingArguments(
            output_dir=CONFIG["OUTPUT_DIR"],
            logging_dir=CONFIG["LOGS_DIR"],
            **TRAINING_CONFIG,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            logging_steps=100,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy"
        )
        
        # Initialize callbacks
        metrics_logger = MetricsLoggerCallback()
        early_stopping = EarlyStoppingCallback(early_stopping_patience=2)
        
        # Setup trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,
            callbacks=[early_stopping, metrics_logger]
        )
        
        # Train and evaluate
        logger.info("Starting training...")
        trainer.train()
        
        eval_results = trainer.evaluate()
        logger.info("Training completed. Computing final metrics...")
        
        # Calculate and display average metrics
        avg_metrics = {
            metric: sum(values) / len(values)
            for metric, values in metrics_logger.metrics_history.items()
        }
        
        print("\nAverage Metrics:")
        print("-" * 50)
        for metric, value in avg_metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")
        
        return eval_results, avg_metrics
        
    except Exception as e:
        logger.error(f"Error during training: {str(e)}")
        raise

if __name__ == "__main__":
    train_model()