In [None]:
pip install python-dotenv

In [None]:
pip install anthropic

In [None]:

# Mixtral-8x7B-32768 is an advanced large language model (LLM) created by Mistral AI
import os
import json
from datetime import datetime
from dotenv import load_dotenv
from groq import Groq
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import logging
import time
from json import JSONDecodeError

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()
api_key = os.getenv('your_api_key_here')
if not api_key:
    raise ValueError("API_KEY environment variable is required")

# Configuration
DATA_PATH = "/root/workspace/npe_project/llm/Best NPE Examples.json"
OUTPUT_DIR = "results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Rate limiting configuration
RATE_LIMIT_DELAY = 2  # seconds between API calls

def load_data(filepath):
    """Load and validate JSON data from file"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        logger.info(f"Successfully loaded {len(data)} records from {filepath}")
        return data
    except FileNotFoundError:
        logger.error(f"Data file not found: {filepath}")
        raise
    except json.JSONDecodeError:
        logger.error(f"Invalid JSON format in file: {filepath}")
        raise

class NPEAgent:
    def __init__(self, client, role, prompt):
        self.client = client
        self.role = role
        self.prompt = prompt
    
    def process(self, content, previous_results=None):
        try:
            # Rate limiting
            time.sleep(RATE_LIMIT_DELAY)
            
            messages = [{"role": "system", "content": self.prompt}]
            
            # Format the user message
            user_message = content if not previous_results else \
                f"Previous analysis: {json.dumps(previous_results)}\n\nNew content: {content}"
            messages.append({"role": "user", "content": user_message})
            
            completion = self.client.chat.completions.create(
                model="claude-3-sonnet",
                messages=messages,
                temperature=0.1,
                max_tokens=500,
                response_format={"type": "json_object"}
            )
            
            response_text = completion.choices[0].message.content
            try:
                return json.loads(response_text)
            except JSONDecodeError:
                logger.error(f"Invalid JSON response from {self.role}: {response_text}")
                return self._get_default_response("Error parsing response")
                
        except Exception as e:
            logger.error(f"Error in {self.role} processing: {str(e)}")
            return self._get_default_response(f"Error: {str(e)}")

    def _get_default_response(self, reason):
        if self.role == "evaluator":
            return {
                "final_decision": "Not-NPE",
                "confidence": 0.0,
                "feedback": reason
            }
        return {
            "npe_found": False,
            "confidence": 0.0,
            "reasoning": reason
        }

def multi_agent_classify(client, commit_message, patch, added_lines):
    try:
        detector = NPEAgent(client, "detector", """
            You are a specialized NullPointerException (NPE) detector. 
            Analyze the given code changes and respond with a JSON object containing:
            {
                "npe_found": boolean,
                "confidence": float between 0-1,
                "reasoning": string explanation
            }
        """)
        
        classifier = NPEAgent(client, "classifier", """
            You are a code pattern classifier specializing in NPE fixes.
            Analyze the code and respond with a JSON object containing:
                "is_npe_fix": boolean,
                "pattern_match": float between 0-1,
                "identified_patterns": array of strings
            }
        """)
        
        evaluator = NPEAgent(client, "evaluator", """
            You are a senior code reviewer evaluating NPE fix classifications.
            Review the analysis and respond with a JSON object containing:
            {
                "final_decision": string ("NPE-Fixes" or "Not-NPE"),
                "confidence": float between 0-1,
                "feedback": string explanation
            }
        """)
        
        content = {
            "commit_message": commit_message,
            "patch": patch,
            "added_lines": added_lines
        }
        
        detection_result = detector.process(json.dumps(content))
        if not isinstance(detection_result, dict):
            logger.warning(f"Invalid detection result format: {detection_result}")
            return "Not-NPE"
            
        classification_result = classifier.process(json.dumps(content), detection_result)
        if not isinstance(classification_result, dict):
            logger.warning(f"Invalid classification result format: {classification_result}")
            return "Not-NPE"
            
        final_result = evaluator.process(json.dumps(content), {
            "detection": detection_result,
            "classification": classification_result
        })
        
        if isinstance(final_result, dict) and "final_decision" in final_result:
            return final_result["final_decision"]
        return "Not-NPE"
        
    except Exception as e:
        logger.error(f"Error in multi-agent classification: {str(e)}")
        return "Not-NPE"

def calculate_metrics(y_true, y_pred):
    try:
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        return {
            "accuracy": accuracy_score(y_true, y_pred),
            "precision": precision_score(y_true, y_pred, pos_label="NPE-Fixes"),
            "recall": recall_score(y_true, y_pred, pos_label="NPE-Fixes"),
            "f1": f1_score(y_true, y_pred, pos_label="NPE-Fixes"),
            "confusion_matrix": {
                "true_negatives": int(tn),
                "false_positives": int(fp),
                "false_negatives": int(fn),
                "true_positives": int(tp)
            },
            "fpr": fp / (fp + tn) if (fp + tn) > 0 else 0,
            "fnr": fn / (fn + tp) if (fn + tp) > 0 else 0
        }
    except Exception as e:
        logger.error(f"Error calculating metrics: {str(e)}")
        raise

def print_metrics(metrics):
    print("\nClassification Results:")
    print("-" * 50)
    print(f"\nAccuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"False Positive Rate: {metrics['fpr']:.4f}")
    print(f"False Negative Rate: {metrics['fnr']:.4f}")
    
    print("\nConfusion Matrix:")
    cm = metrics['confusion_matrix']
    print(f"True Negatives: {cm['true_negatives']}")
    print(f"False Positives: {cm['false_positives']}")
    print(f"False Negatives: {cm['false_negatives']}")
    print(f"True Positives: {cm['true_positives']}")

def save_results(results, metrics, output_dir):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = os.path.join(output_dir, f"classification_results_{timestamp}.json")
    
    full_results = {
        "metrics": metrics,
        "misclassified_commits": results["misclassified"],
        "run_timestamp": timestamp,
        "total_commits_processed": len(results["y_true"]),
        "total_misclassified": len(results["misclassified"])
    }
    
    with open(output_file, 'w') as f:
        json.dump(full_results, f, indent=2)
    logger.info(f"Results saved to {output_file}")
    return output_file

def main():
    try:
        client = Groq(api_key=api_key)
        results = {"y_true": [], "y_pred": [], "misclassified": []}
        
        data = load_data(DATA_PATH)
        total_commits = len(data)
        logger.info(f"Processing {total_commits} commits...")
        
        for idx, item in enumerate(data, 1):
            try:
                logger.info(f"Processing commit {idx}/{total_commits}")
                if idx > 1:
                    time.sleep(1)
                
                true_label = item["Category"]
                final_pred = multi_agent_classify(
                    client,
                    item.get("Commit Message", ""),
                    item.get("Patch", ""),
                    item.get("Added Lines", "")
                )
                
                results["y_true"].append(true_label)
                results["y_pred"].append(final_pred)
                
                if final_pred != true_label:
                    results["misclassified"].append({
                        "Commit SHA": item.get("Commit SHA", "Unknown"),
                        "True Label": true_label,
                        "Predicted": final_pred,
                        "Patch": item.get("Patch", "")
                    })
                
            except Exception as e:
                logger.error(f"Error processing commit {idx}: {str(e)}")
                continue
        
        if results["y_true"] and results["y_pred"]:
            metrics = calculate_metrics(results["y_true"], results["y_pred"])
            print_metrics(metrics)
            output_file = save_results(results, metrics, OUTPUT_DIR)
            logger.info("Classification completed successfully")
        else:
            logger.error("No valid predictions were made")
        
    except Exception as e:
        logger.error(f"Fatal error in main execution: {str(e)}")
        raise
    finally:
        logger.info("Cleaning up resources...")

if __name__ == "__main__":
    main()

In [None]:
import os
import json
import time
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass

from anthropic import Anthropic
from dotenv import load_dotenv
from sklearn.metrics import (
    accuracy_score, precision_score,
    recall_score, f1_score, confusion_matrix
)

# Configuration
CONFIG = {
    "DATA_PATH": "/root/workspace/npe_project/llm/Best NPE Examples.json",
    "OUTPUT_DIR": "results",
    "MODEL": "claude-3-sonnet",
    "RATE_LIMIT_DELAY": 2,
    "MAX_TOKENS": 500,
    "TEMPERATURE": 0.1
}

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('npe_classification.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

@dataclass
class CommitData:
    """Data structure for commit information."""
    sha: str
    message: str
    patch: str
    added_lines: str
    category: str

class NPEAgent:
    """Base agent class for NPE analysis."""
    
    def __init__(self, client: Anthropic, role: str, prompt: str):
        self.client = client
        self.role = role
        self.prompt = prompt
        
    def process(self, 
               content: Dict[str, Any], 
               previous_results: Optional[Dict] = None) -> Dict[str, Any]:
        """Process commit data and return analysis results."""
        try:
            time.sleep(CONFIG["RATE_LIMIT_DELAY"])
            
            # Prepare message
            user_message = (
                f"Previous analysis: {json.dumps(previous_results)}\n\n"
                f"New content: {json.dumps(content)}"
            ) if previous_results else json.dumps(content)
            
            # Get completion from Claude
            completion = self.client.messages.create(
                model=CONFIG["MODEL"],
                system=self.prompt,
                messages=[{
                    "role": "user",
                    "content": user_message
                }],
                temperature=CONFIG["TEMPERATURE"],
                max_tokens=CONFIG["MAX_TOKENS"],
                response_format={"type": "json_object"}
            )
            
            return json.loads(completion.content[0].text)
            
        except Exception as e:
            logger.error(f"Error in {self.role} processing: {str(e)}")
            return self._get_default_response(str(e))
    
    def _get_default_response(self, reason: str) -> Dict[str, Any]:
        """Return default response in case of errors."""
        if self.role == "evaluator":
            return {
                "final_decision": "Not-NPE",
                "confidence": 0.0,
                "feedback": reason
            }
        return {
            "npe_found": False,
            "confidence": 0.0,
            "reasoning": reason
        }

class NPEClassificationSystem:
    """Main classification system coordinating multiple agents."""
    
    def __init__(self, api_key: str):
        self.client = Anthropic(api_key=api_key)
        self.agents = self._initialize_agents()
        
    def _initialize_agents(self) -> Dict[str, NPEAgent]:
        """Initialize the multi-agent system."""
        return {
            "detector": NPEAgent(
                self.client,
                "detector",
                """You are a specialized NullPointerException (NPE) detector. 
                Analyze the given code changes and identify potential NPE-related issues."""
            ),
            "classifier": NPEAgent(
                self.client,
                "classifier",
                """You are a code pattern classifier specializing in NPE fixes.
                Analyze the code and identify specific NPE fix patterns."""
            ),
            "evaluator": NPEAgent(
                self.client,
                "evaluator",
                """You are a senior code reviewer evaluating NPE fix classifications.
                Review the previous analyses and make a final determination."""
            )
        }
    
    def classify_commit(self, commit: CommitData) -> str:
        """Classify a single commit using the multi-agent system."""
        try:
            content = {
                "commit_message": commit.message,
                "patch": commit.patch,
                "added_lines": commit.added_lines
            }
            
            # Multi-stage analysis
            detection = self.agents["detector"].process(content)
            classification = self.agents["classifier"].process(content, detection)
            final_result = self.agents["evaluator"].process(
                content,
                {"detection": detection, "classification": classification}
            )
            
            return final_result.get("final_decision", "Not-NPE")
            
        except Exception as e:
            logger.error(f"Error classifying commit {commit.sha}: {str(e)}")
            return "Not-NPE"

def load_data(filepath: str) -> List[CommitData]:
    """Load and parse commit data from JSON file."""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        commits = [
            CommitData(
                sha=item.get("Commit SHA", "Unknown"),
                message=item.get("Commit Message", ""),
                patch=item.get("Patch", ""),
                added_lines=item.get("Added Lines", ""),
                category=item["Category"]
            )
            for item in data
        ]
        
        logger.info(f"Loaded {len(commits)} commits from {filepath}")
        return commits
        
    except Exception as e:
        logger.error(f"Error loading data: {str(e)}")
        raise

def main():
    """Main execution function."""
    try:
        # Initialize
        load_dotenv()
        api_key = os.getenv('ANTHROPIC_API_KEY')
        if not api_key:
            raise ValueError("ANTHROPIC_API_KEY environment variable is required")
        
        classifier = NPEClassificationSystem(api_key)
        
        # Process commits
        commits = load_data(CONFIG["DATA_PATH"])
        results = {
            "y_true": [],
            "y_pred": [],
            "misclassified": []
        }
        
        for idx, commit in enumerate(commits, 1):
            logger.info(f"Processing commit {idx}/{len(commits)}")
            prediction = classifier.classify_commit(commit)
            
            results["y_true"].append(commit.category)
            results["y_pred"].append(prediction)
            
            if prediction != commit.category:
                results["misclassified"].append({
                    "sha": commit.sha,
                    "true_label": commit.category,
                    "predicted": prediction
                })
        
        # Calculate and save metrics
        metrics = calculate_metrics(results["y_true"], results["y_pred"])
        save_results(results, metrics)
        
    except Exception as e:
        logger.error(f"Fatal error: {str(e)}")
        raise

if __name__ == "__main__":
    main()