In [1]:
#Tagline: Transforming Critical Feedback into Constructive 
#Hackathon Mission 1: The Empathetic Code Reviewer

In [2]:
!pip install google-generativeai transformers torch sentence-transformers textstat

import json
import re
import os
from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from datetime import datetime

# For traditional ML approach
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from textstat import flesch_reading_ease, flesch_kincaid_grade
import nltk
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
    nltk.download('vader_lexicon')

from nltk.sentiment import SentimentIntensityAnalyzer

# For advanced AI approach (optional)
try:
    import google.generativeai as genai
    from transformers import pipeline
    HAS_AI_LIBS = True
except ImportError:
    HAS_AI_LIBS = False
    print("AI libraries not available. Using traditional ML approach.")

print("Setup complete!")



2025-08-28 12:36:47.125554: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756384607.152914     269 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756384607.161035     269 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Setup complete!


In [3]:
#  Define the sample input data (matching exact specification)
sample_input = {
    "code_snippet": "def get_active_users(users):\n    results = []\n    for u in users:\n        if u.is_active == True and u.profile_complete == True:\n            results.append(u)\n    return results",
    
    "review_comments": [
        "This is inefficient. Don't loop twice conceptually.",
        "Variable 'u' is a bad name.",
        "Boolean comparison '== True' is redundant."
    ]
}
print("Sample input loaded:")
print(json.dumps(sample_input, indent=2))

Sample input loaded:
{
  "code_snippet": "def get_active_users(users):\n    results = []\n    for u in users:\n        if u.is_active == True and u.profile_complete == True:\n            results.append(u)\n    return results",
  "review_comments": [
    "This is inefficient. Don't loop twice conceptually.",
    "Variable 'u' is a bad name.",
    "Boolean comparison '== True' is redundant."
  ]
}


In [4]:
#Traditional ML-based Empathetic Reviewer Class
class EmpatheticCodeReviewer:
    def __init__(self, use_ai=False, api_key=None):
        self.use_ai = use_ai
        self.sentiment_analyzer = SentimentIntensityAnalyzer()
        
        # Initialize AI if requested and available
        if use_ai and HAS_AI_LIBS and api_key:
            try:
                genai.configure(api_key=api_key)
                self.model = genai.GenerativeModel('gemini-pro')
                self.ai_available = True
                print("AI model initialized successfully!")
            except Exception as e:
                print(f"AI initialization failed: {e}")
                self.ai_available = False
        else:
            self.ai_available = False
            
        # Empathetic language patterns
        self.positive_starters = [
            "Great start on the logic here!",
            "Nice approach to this problem!",
            "Good foundation in place!",
            "I can see the effort put into this!",
            "The core functionality looks solid!"
        ]
        
        self.constructive_connectors = [
            "Here's a way to make it even better:",
            "Consider this enhancement:",
            "For improved maintainability:",
            "To boost performance:",
            "For better readability:"
        ]
        
        self.improvement_templates = {
            'performance': "This approach works, but we can optimize it by {suggestion}. This will make it faster and more efficient.",
            'readability': "The logic is sound! For better code clarity, consider {suggestion}. This helps other developers understand your intent.",
            'best_practices': "Good start! Following Python conventions, we could {suggestion}. This aligns with industry standards.",
            'maintainability': "Nice work! To make future changes easier, let's {suggestion}. This will help the codebase evolve smoothly."
        }

    def analyze_sentiment(self, text: str) -> Dict:
        """Analyze sentiment of feedback to determine empathy level needed"""
        scores = self.sentiment_analyzer.polarity_scores(text)
        
        # Classify feedback tone
        if scores['compound'] <= -0.5:
            tone = 'harsh'
        elif scores['compound'] <= -0.1:
            tone = 'critical'
        elif scores['compound'] >= 0.1:
            tone = 'positive'
        else:
            tone = 'neutral'
            
        return {
            'tone': tone,
            'scores': scores,
            'needs_empathy': scores['compound'] < -0.1
        }

    def categorize_feedback(self, comment: str) -> str:
        """Categorize the type of feedback to apply appropriate templates"""
        comment_lower = comment.lower()
        
        if any(word in comment_lower for word in ['inefficient', 'slow', 'performance', 'loop', 'optimize']):
            return 'performance'
        elif any(word in comment_lower for word in ['name', 'variable', 'readable', 'clear', 'understand']):
            return 'readability'
        elif any(word in comment_lower for word in ['redundant', 'convention', 'style', 'best practice']):
            return 'best_practices'
        else:
            return 'maintainability'

    def extract_suggestion(self, comment: str, code: str) -> str:
        """Extract and enhance the core suggestion from critical feedback"""
        comment_lower = comment.lower()
        
        # Pattern matching for common issues
        if 'loop twice' in comment_lower or 'inefficient' in comment_lower:
            return "using list comprehension or filter() for cleaner, more Pythonic code"
        
        elif 'bad name' in comment_lower or 'variable' in comment_lower:
            # Extract variable name if possible
            var_match = re.search(r"Variable '(\w+)'", comment)
            if var_match:
                var_name = var_match.group(1)
                return f"using a more descriptive name like 'user' instead of '{var_name}' to improve code clarity"
            return "using more descriptive variable names that clearly indicate their purpose"
        
        elif 'redundant' in comment_lower and 'true' in comment_lower:
            return "writing 'if u.is_active:' instead of 'if u.is_active == True:' - Python treats boolean values naturally"
        
        elif 'redundant' in comment_lower:
            return "simplifying the boolean logic for cleaner, more readable code"
        
        else:
            return "refactoring this section for improved maintainability and clarity"

    def create_code_example(self, original_code: str, comment: str) -> str:
        """Generate improved code example based on the feedback"""
        comment_lower = comment.lower()
        
        # For the specific example provided
        if 'loop twice' in comment_lower or 'inefficient' in comment_lower:
            return '''**Suggested Improvement:**
```python
def get_active_users(users):
    return [user for user in users 
            if user.is_active and user.profile_complete]
```
*Using list comprehension makes the code more concise and Pythonic.*'''

        elif 'bad name' in comment_lower:
            improved_code = original_code.replace(' u ', ' user ').replace('u.', 'user.')
            return f'''**Suggested Improvement:**
```python
{improved_code}
```
*More descriptive variable names improve code readability.*'''

        elif 'redundant' in comment_lower and 'true' in comment_lower:
            improved_code = original_code.replace('== True', '')
            return f'''**Suggested Improvement:**
```python
{improved_code}
```
*Python treats boolean values naturally - no need for explicit comparison.*'''

        return "```python\n# Refactored version would go here\n```"

    def generate_empathetic_response_traditional(self, comment: str, code: str) -> Dict:
        """Generate empathetic response using traditional NLP techniques"""
        
        # Analyze the sentiment and categorize
        sentiment = self.analyze_sentiment(comment)
        category = self.categorize_feedback(comment)
        suggestion = self.extract_suggestion(comment, code)
        
        # Select appropriate empathetic opening
        positive_start = np.random.choice(self.positive_starters)
        connector = np.random.choice(self.constructive_connectors)
        
        # Build the empathetic response
        if sentiment['needs_empathy']:
            # High empathy needed for harsh/critical feedback
            template = self.improvement_templates[category]
            main_feedback = template.format(suggestion=suggestion)
            
            response = f"{positive_start} {connector} {main_feedback}"
        else:
            # Lower empathy needed for neutral/positive feedback
            response = f"{connector.capitalize()} {suggestion}."
        
        # Add code example
        code_example = self.create_code_example(code, comment)
        
        # Calculate readability score
        readability = flesch_reading_ease(response)
        
        return {
            'positive_rephrasing': response,
            'the_why': f"This change improves code {category.replace('_', ' ')} and follows Python best practices.",
            'suggested_improvement': code_example,
            'sentiment_analysis': sentiment,
            'category': category,
            'readability_score': readability
        }

    def generate_empathetic_response_ai(self, comment: str, code: str) -> Dict:
        """Generate empathetic response using Gemini AI"""
        
        prompt = f"""
        You are an empathetic senior developer providing constructive code review feedback. 
        
        Original harsh comment: "{comment}"
        Code snippet: 
        ```python
        {code}
        ```
        
        Please transform this into empathetic, constructive feedback that includes:
        
        1. **Positive Rephrasing**: Start with something encouraging, then provide the same technical advice but in a supportive way
        2. **The Why**: Explain the reasoning behind the suggestion (performance, readability, maintainability, etc.)
        3. **Suggested Improvement**: Provide a concrete code example showing the improvement
        
        Format your response as a JSON with keys: "positive_rephrasing", "the_why", "suggested_improvement"
        
        Remember: Be encouraging, educational, and focus on growth rather than criticism.
        """
        
        try:
            response = self.model.generate_content(prompt)
            
            # Try to extract JSON from response
            response_text = response.text
            
            # Look for JSON in the response
            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())
            else:
                # Fallback to structured parsing
                lines = response_text.split('\n')
                result = {
                    'positive_rephrasing': response_text[:200] + "...",
                    'the_why': "AI-generated explanation for code improvement",
                    'suggested_improvement': "```python\n# AI-suggested improvement\n```"
                }
            
            # Add sentiment analysis
            sentiment = self.analyze_sentiment(comment)
            result['sentiment_analysis'] = sentiment
            result['ai_generated'] = True
            
            return result
            
        except Exception as e:
            print(f"AI generation failed: {e}")
            # Fallback to traditional method
            return self.generate_empathetic_response_traditional(comment, code)

    def process_review(self, input_data: Dict) -> str:
        """Main processing function that generates the final Markdown report"""
        
        code_snippet = input_data['code_snippet']
        review_comments = input_data['review_comments']
        
        # Process each comment
        processed_comments = []
        
        for i, comment in enumerate(review_comments, 1):
            if self.ai_available:
                result = self.generate_empathetic_response_ai(comment, code_snippet)
            else:
                result = self.generate_empathetic_response_traditional(comment, code_snippet)
            
            result['original_comment'] = comment
            result['comment_number'] = i
            processed_comments.append(result)
        
        # Generate final markdown report
        return self.generate_markdown_report(code_snippet, processed_comments)

    def generate_markdown_report(self, code_snippet: str, processed_comments: List[Dict]) -> str:
        """Generate the final Markdown report"""
        
        report = f"""# Empathetic Code Review Report
*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*

## Original Code
```python
{code_snippet}
```

## Constructive Feedback

"""
        
        for comment_data in processed_comments:
            report += f"""### Comment {comment_data['comment_number']}

**Original Feedback:** "{comment_data['original_comment']}"

**Positive Rephrasing:** {comment_data['positive_rephrasing']}

**The Why:** {comment_data['the_why']}

**Suggested Improvement:**
{comment_data['suggested_improvement']}

---

"""

        # Add summary section
        total_comments = len(processed_comments)
        harsh_comments = sum(1 for c in processed_comments 
                           if c['sentiment_analysis']['needs_empathy'])
        
        report += f"""## Summary

- **Total Comments Processed:** {total_comments}
- **Comments Needing Empathy:** {harsh_comments}
- **AI-Enhanced:** {'Yes' if self.ai_available else 'No (Traditional ML)'}

## Key Improvements Identified
"""
        
        categories = set(c.get('category', 'general') for c in processed_comments)
        for category in categories:
            report += f"- **{category.replace('_', ' ').title()}** improvements suggested\n"
        
        report += """
*Remember: Every piece of feedback is an opportunity to grow and learn. Keep coding with confidence!*
"""
        
        return report

print("EmpatheticCodeReviewer class defined successfully!")


EmpatheticCodeReviewer class defined successfully!


In [5]:
# Cell 4: Initialize and test the reviewer (Traditional ML approach)
print("=== Testing Traditional ML Approach ===")
reviewer_traditional = EmpatheticCodeReviewer(use_ai=False)

# Test with sample input
result_traditional = reviewer_traditional.process_review(sample_input)
print("Traditional ML processing complete!")
print("\n" + "="*50 + "\n")
print(result_traditional)


=== Testing Traditional ML Approach ===
Traditional ML processing complete!


# Empathetic Code Review Report
*Generated on 2025-08-28 12:36:53*

## Original Code
```python
def get_active_users(users):
    results = []
    for u in users:
        if u.is_active == True and u.profile_complete == True:
            results.append(u)
    return results
```

## Constructive Feedback

### Comment 1

**Original Feedback:** "This is inefficient. Don't loop twice conceptually."

**Positive Rephrasing:** Consider this enhancement: using list comprehension or filter() for cleaner, more Pythonic code.

**The Why:** This change improves code performance and follows Python best practices.

**Suggested Improvement:**
**Suggested Improvement:**
```python
def get_active_users(users):
    return [user for user in users 
            if user.is_active and user.profile_complete]
```
*Using list comprehension makes the code more concise and Pythonic.*

---

### Comment 2

**Original Feedback:** "Variable 'u

In [6]:

print(" AI-Enhanced Approach ")


GEMINI_API_KEY = "AIzaSyAbjeA1aF1Lb4Y7PWpUqAE88usTIBaIGCA"
reviewer_ai = EmpatheticCodeReviewer(use_ai=True, api_key=GEMINI_API_KEY)
result_ai = reviewer_ai.process_review(sample_input)
print("AI processing complete!")
print("\n" + "="*50 + "\n")
print(result_ai)



 AI-Enhanced Approach 
AI model initialized successfully!
AI generation failed: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
AI generation failed: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
AI generation failed: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
AI processing complete!


# Empathetic Code Review Report
*Generated on 2025-08-28 12:36:53*

## Original Code
```python
def get_active_users(users):
    results = []
    for u in users:
        if u.is_active == True and u.profile_complete == True:
            results.append(u)
    return results
```

## Constructive Feedback

###

In [7]:

def interactive_test():
    """Interactive function to test with custom input"""
    print("=== Interactive Empathetic Code Reviewer ===")
    
    # Get custom input
    custom_code = input("\nEnter your code snippet (or press Enter for sample): ").strip()
    if not custom_code:
        custom_code = sample_input['code_snippet']
    
    print("\nEnter review comments (one per line, empty line to finish):")
    custom_comments = []
    while True:
        comment = input().strip()
        if not comment:
            break
        custom_comments.append(comment)
    
    if not custom_comments:
        custom_comments = sample_input['review_comments']
    
    # Process with traditional ML
    custom_input = {
        'code_snippet': custom_code,
        'review_comments': custom_comments
    }
    
    reviewer = EmpatheticCodeReviewer(use_ai=False)
    result = reviewer.process_review(custom_input)
    
    print("\n" + "="*60)
    print("EMPATHETIC REVIEW RESULT:")
    print("="*60)
    print(result)
    
    return result

# Uncomment the line below to run interactive testing
# interactive_test()

print("Setup complete! You can now:")


Setup complete! You can now:


In [8]:
#  Batch processing example
def batch_process_reviews(review_data_list: List[Dict]) -> List[str]:
    """Process multiple code reviews in batch"""
    reviewer = EmpatheticCodeReviewer(use_ai=False)
    results = []
    
    for i, data in enumerate(review_data_list):
        print(f"Processing review {i+1}/{len(review_data_list)}...")
        result = reviewer.process_review(data)
        results.append(result)
    
    return results

# Example batch data
batch_example = [
    {
        "code_snippet": "def calc(x,y): return x+y",
        "review_comments": ["No docstring.", "Poor parameter names."]
    },
    {
        "code_snippet": "while True: pass",
        "review_comments": ["Infinite loop is dangerous.", "No exit condition."]
    }
]

# Process batch (uncomment to run)
batch_results = batch_process_reviews(batch_example)
for i, result in enumerate(batch_results):
   print(f"\n=== BATCH RESULT {i+1} ===")
   print(result)

print("Batch processing function ready!")

Processing review 1/2...
Processing review 2/2...

=== BATCH RESULT 1 ===
# Empathetic Code Review Report
*Generated on 2025-08-28 12:36:53*

## Original Code
```python
def calc(x,y): return x+y
```

## Constructive Feedback

### Comment 1

**Original Feedback:** "No docstring."

**Positive Rephrasing:** Great start on the logic here! Here's a way to make it even better: Nice work! To make future changes easier, let's refactoring this section for improved maintainability and clarity. This will help the codebase evolve smoothly.

**The Why:** This change improves code maintainability and follows Python best practices.

**Suggested Improvement:**
```python
# Refactored version would go here
```

---

### Comment 2

**Original Feedback:** "Poor parameter names."

**Positive Rephrasing:** The core functionality looks solid! Consider this enhancement: The logic is sound! For better code clarity, consider refactoring this section for improved maintainability and clarity. This helps other dev

In [9]:
# Cell 8: Evaluation metrics
def evaluate_empathy_improvement(original_comments: List[str], 
                               empathetic_responses: List[str]) -> Dict:
    """Evaluate how much the empathy has improved"""
    
    analyzer = SentimentIntensityAnalyzer()
    
    original_sentiments = [analyzer.polarity_scores(comment) for comment in original_comments]
    empathetic_sentiments = [analyzer.polarity_scores(response) for response in empathetic_responses]
    
    # Calculate improvement metrics
    original_avg_compound = np.mean([s['compound'] for s in original_sentiments])
    empathetic_avg_compound = np.mean([s['compound'] for s in empathetic_sentiments])
    
    improvement = empathetic_avg_compound - original_avg_compound
    
    # Readability scores
    original_readability = np.mean([flesch_reading_ease(comment) for comment in original_comments])
    empathetic_readability = np.mean([flesch_reading_ease(response) for response in empathetic_responses])
    
    return {
        'sentiment_improvement': improvement,
        'original_sentiment': original_avg_compound,
        'empathetic_sentiment': empathetic_avg_compound,
        'readability_improvement': empathetic_readability - original_readability,
        'original_readability': original_readability,
        'empathetic_readability': empathetic_readability
    }

# Test evaluation
sample_responses = [
    "Great start on the logic here! For improved performance: using list comprehension or filter() for cleaner, more Pythonic code. This will make it faster and more efficient.",
    "Nice approach to this problem! For better readability: using a more descriptive name like 'user' instead of 'u' to improve code clarity. This helps other developers understand your intent.",
    "Good foundation in place! Following Python conventions, we could write 'if u.is_active:' instead of 'if u.is_active == True:' - Python treats boolean values naturally. This aligns with industry standards."
]

evaluation = evaluate_empathy_improvement(sample_input['review_comments'], sample_responses)
print(" EMPATHY EVALUATION ")
for key, value in evaluation.items():
    print(f"{key}: {value:.3f}")

print("Empathetic Code Reviewer is ready for the hackathon!")

 EMPATHY EVALUATION 
sentiment_improvement: 0.884
original_sentiment: -0.040
empathetic_sentiment: 0.844
readability_improvement: 20.595
original_readability: 35.549
empathetic_readability: 56.144
Empathetic Code Reviewer is ready for the hackathon!
