In [None]:
import os
import json
import time
import datetime
from dotenv import load_dotenv
import google.generativeai as genai
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Configuration
MAX_RETRIES = 5
BASE_DELAY = 5
MAX_DELAY = 120
REQUEST_DELAY = 2

# Load environment
load_dotenv()
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
    raise ValueError("GEMINI_API_KEY environment variable is required")

# Configure Gemini
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.0-flash')

def load_input_data(filename):
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found")
        return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Agent prompts
DETECTOR_PROMPT = """You are a specialized NPE detector. Analyze commits for NPE-related changes.
Output format: {"npe_found": boolean, "confidence": float, "reasoning": "string"}"""

CLASSIFIER_PROMPT = """You are a code pattern classifier for NPE fixes.
Output format: {"is_npe_fix": boolean, "pattern_match": float, "patterns": []}"""

EVALUATOR_PROMPT = """You are a senior reviewer making final NPE classifications.
Output format: {"final_decision": "NPE-Fixes"/"Not-NPE", "confidence": float, "feedback": "string"}"""

class NPEAgent:
    def __init__(self, model, role, prompt):
        self.model = model
        self.role = role
        self.prompt = prompt
        self.last_request_time = 0
    
    def wait_for_rate_limit(self):
        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        if time_since_last < REQUEST_DELAY:
            time.sleep(REQUEST_DELAY - time_since_last)
        self.last_request_time = time.time()
    
    def validate_json(self, text):
        try:
            start = text.find('{')
            end = text.rfind('}') + 1
            if start >= 0 and end > 0:
                return json.loads(text[start:end])
            return self.default_response()
        except:
            return self.default_response()
    
    def default_response(self):
        defaults = {
            "detector": {"npe_found": False, "confidence": 0.0, "reasoning": "Error"},
            "classifier": {"is_npe_fix": False, "pattern_match": 0.0, "patterns": []},
            "evaluator": {"final_decision": "Not-NPE", "confidence": 0.0, "feedback": "Error"}
        }
        return defaults.get(self.role, defaults["evaluator"])
    
    def process(self, content, previous_results=None):
        retries = 0
        while retries < MAX_RETRIES:
            try:
                self.wait_for_rate_limit()
                
                prompt = f"{self.prompt}\n\nRespond only in JSON format\n\n"
                if previous_results:
                    prompt += f"Previous: {json.dumps(previous_results)}\nAnalyze: {content}"
                else:
                    prompt += content

                if retries > 0:
                    delay = min(BASE_DELAY * (2 ** retries), MAX_DELAY)
                    print(f"Retrying in {delay} seconds... ({retries}/{MAX_RETRIES})")
                    time.sleep(delay)

                generation_config = genai.GenerationConfig(
                    temperature=0.1,
                    max_output_tokens=500,
                )
                
                response = self.model.generate_content(
                    prompt,
                    generation_config=generation_config
                )
                return self.validate_json(response.text)
                
            except Exception as e:
                retries += 1
                print(f"Error in {self.role} (attempt {retries}/{MAX_RETRIES}): {e}")
                if retries < MAX_RETRIES:
                    continue
                return self.default_response()

def multi_agent_classify(model, commit_message, patch, added_lines):
    try:
        detector = NPEAgent(model, "detector", DETECTOR_PROMPT)
        classifier = NPEAgent(model, "classifier", CLASSIFIER_PROMPT)
        evaluator = NPEAgent(model, "evaluator", EVALUATOR_PROMPT)
        
        content = f"""
        Commit Message: {commit_message}
        Patch: {patch}
        Added Lines: {added_lines}
        """
        
        print("Running detector...")
        detection_result = detector.process(content)
        
        print("Running classifier...")
        classification_result = classifier.process(content, detection_result)
        
        print("Running evaluator...")
        evaluation_result = evaluator.process(content, {
            "detection": detection_result,
            "classification": classification_result
        })
        
        print("Running refined analysis...")
        refined_detection = detector.process(content, evaluation_result)
        refined_classification = classifier.process(content, {
            "previous_detection": detection_result,
            "previous_evaluation": evaluation_result,
            "new_detection": refined_detection
        })
        
        final_result = evaluator.process(content, {
            "first_iteration": evaluation_result,
            "refined_detection": refined_detection,
            "refined_classification": refined_classification
        })
        
        return final_result.get("final_decision", "Not-NPE")
    except Exception as e:
        print(f"Classification error: {e}")
        return "Not-NPE"

def main():
    start_time = datetime.datetime.now()
    print(f"Starting analysis at {start_time}")
    
    data = load_input_data("NPEPatches.json")
    if not data:
        return
    
    results = {
        "y_true": [], 
        "y_pred": [], 
        "misclassified": [],
        "start_time": start_time.isoformat(),
        "end_time": None
    }
    
    total_items = len(data)
    for idx, item in enumerate(data, 1):
        print(f"\nProcessing item {idx}/{total_items}...")
        print(f"Progress: {(idx/total_items)*100:.1f}%")
        
        true_label = item["Category"]
        prediction = multi_agent_classify(
            model,
            item["Commit Message"],
            item["Patch"],
            item["Added Lines"]
        )
        
        results["y_true"].append(true_label)
        results["y_pred"].append(prediction)
        
        if prediction != true_label:
            results["misclassified"].append({
                "SHA": item.get("Commit SHA", "N/A"),
                "True": true_label,
                "Predicted": prediction,
                "Patch": item["Patch"]
            })
    
    end_time = datetime.datetime.now()
    results["end_time"] = end_time.isoformat()
    
    metrics = {
        "accuracy": accuracy_score(results["y_true"], results["y_pred"]),
        "precision": precision_score(results["y_true"], results["y_pred"], 
                                  pos_label="NPE-Fixes", zero_division=0),
        "recall": recall_score(results["y_true"], results["y_pred"], 
                             pos_label="NPE-Fixes", zero_division=0),
        "f1": f1_score(results["y_true"], results["y_pred"], 
                      pos_label="NPE-Fixes", zero_division=0)
    }
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    results_file = f"results_{timestamp}.json"
    
    with open(results_file, "w") as f:
        json.dump({
            "metrics": metrics,
            "misclassified": results["misclassified"],
            "execution_time": str(end_time - start_time)
        }, f, indent=2)
    
    print("\nResults:")
    for metric, value in metrics.items():
        print(f"{metric.capitalize()}: {value:.4f}")
    print(f"Misclassified: {len(results['misclassified'])}")
    print(f"Total execution time: {end_time - start_time}")
    print(f"Results saved to: {results_file}")

if __name__ == "__main__":
    main()