1. Environment Setup in Colab

In [1]:
!pip install --quiet google-generativeai black radon python-dotenv

Set API Keys Securely

In [2]:
from google.colab import userdata
import google.generativeai as genai
import os, json, re, textwrap, datetime
from pathlib import Path

# Load Gemini API key from Colab secrets (must match name you saved)
GEMINI_API_KEY = userdata.get('GOOGLE_API_KEY')
if not GEMINI_API_KEY:
    raise ValueError("Gemini API key not found in Colab secrets. Add it as GOOGLE_API_KEY.")

# Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)

# Model selection
GEMINI_MODEL = "gemini-1.5-flash"

# Output directory
OUT_DIR = Path("mission1_outputs")
OUT_DIR.mkdir(parents=True, exist_ok=True)
REPORT_PATH = OUT_DIR / "report.md"

print("Gemini model:", GEMINI_MODEL)

Gemini model: gemini-1.5-flash


Helpers for language & severity detection

In [3]:
try:
    import black
except Exception:
    black = None

try:
    from radon.complexity import cc_visit
except Exception:
    cc_visit = None

def detect_language_from_code(code: str) -> str:
    c = code.strip().lower()
    if "def " in c or "import " in c and ":" in c:
        return "Python"
    if "#include" in c or "int main(" in c:
        return "C/C++"
    if "public class" in c or "System.out.println" in c:
        return "Java"
    if "function" in c and "console.log" in c or "=>":
        return "JavaScript"
    return "Unknown"

NEGATIVE_WORDS = ["inefficient","bad","poor","wrong","redundant","bug","smell"]

def estimate_severity(comment: str) -> str:
    c = comment.lower()
    hits = sum(w in c for w in NEGATIVE_WORDS)
    if any(k in c for k in ["security","vulnerability","crash"]):
        return "critical"
    if hits >= 2 or "inefficient" in c:
        return "major"
    return "minor"

def optional_code_format(code_text: str, lang="python") -> str:
    if not code_text.strip():
        return code_text
    if lang.lower() == "python" and black:
        try:
            return black.format_str(code_text, mode=black.Mode())
        except Exception:
            return code_text
    return code_text

def compute_complexity_python(code: str) -> float | None:
    if not cc_visit:
        return None
    try:
        blocks = cc_visit(code)
        if not blocks:
            return 0.0
        return sum(b.complexity for b in blocks) / len(blocks)
    except Exception:
        return None


Gemini call function

In [4]:
def call_llm_gemini(code_snippet: str, review_comment: str, language: str, severity: str) -> dict:
    SYSTEM_PROMPT = """You are a senior software engineer and patient mentor.
    Rewrite blunt code review comments into empathetic, constructive, and educational guidance.
    Always include: Positive Rephrasing, The 'Why' (principle), and a concrete Suggested Improvement code snippet.
    Respond in JSON format with keys: positive_rephrasing, why, improved_code."""

    USER_PROMPT_TEMPLATE = f"""
    Programming language: {language}
    Severity: {severity}

    Code:
    {code_snippet}

    Original Review Comment:
    "{review_comment}"

    Respond with:
    {{
      "positive_rephrasing": "...",
      "why": "...",
      "improved_code": "..."
    }}
    """

    response = genai.GenerativeModel(GEMINI_MODEL).generate_content(
        [SYSTEM_PROMPT, USER_PROMPT_TEMPLATE]
    )

    match = re.search(r"\{.*\}", response.text, flags=re.DOTALL)
    try:
        data = json.loads(match.group(0) if match else response.text)
    except:
        data = {
            "positive_rephrasing": f"Thanks for your work! Suggest refining: {review_comment}",
            "why": "Improves clarity and follows best practices.",
            "improved_code": code_snippet,
        }
    return data


Markdown formatting

In [5]:
STYLE_LINKS = {
    "Python": [
        ("PEP 8: Function & variable names", "https://peps.python.org/pep-0008/#function-and-variable-names"),
        ("Python Typing", "https://docs.python.org/3/library/typing.html"),
        ("List Comprehensions", "https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions"),
    ]
}

def format_section(original_comment: str, result: dict, language: str) -> str:
    imp_code = result.get("improved_code","").strip()
    if detect_language_from_code(imp_code) == "Python":
        imp_code = optional_code_format(imp_code, "python")

    section = []
    section.append(f'### Analysis of Comment: "{original_comment}"\n')
    section.append(f"* **Positive Rephrasing:** {result.get('positive_rephrasing','')}")
    section.append(f"* **The 'Why':** {result.get('why','')}")
    section.append("* **Suggested Improvement:**")
    section.append(f"```{language.lower() if language!='Unknown' else ''}")
    section.append(imp_code if imp_code else "# (No change needed)")
    section.append("```")

    links = STYLE_LINKS.get(language, [])
    if links:
        section.append("\n*Resources:*")
        for title, url in links:
            section.append(f"- {title}: {url}")

    return "\n".join(section) + "\n"

def format_summary(num_comments: int, language: str, complexity: float | None) -> str:
    bullets = [
        f"- Addressed **{num_comments}** comment(s).",
        f"- Detected language: **{language}**."
    ]
    if complexity is not None:
        bullets.append(f"- Avg cyclomatic complexity: **{complexity:.2f}**.")
    bullets.append("- Code readability and maintainability improved!")
    return "## Overall Summary\n\n" + "\n".join(bullets) + "\n"


Report generator

In [6]:
def generate_report(code_snippet: str, review_comments: list[str]) -> str:
    language = detect_language_from_code(code_snippet)
    severity_list = [estimate_severity(c) for c in review_comments]
    complexity = compute_complexity_python(code_snippet) if language == "Python" else None

    md_parts = [f"# Empathetic Code Review Report\n\n*Generated: {datetime.datetime.now().isoformat(timespec='seconds')}*",
                f"\n**Detected Language:** {language}\n"]

    for comment, sev in zip(review_comments, severity_list):
        result = call_llm_gemini(code_snippet, comment, language, sev)
        md_parts.append(format_section(comment, result, language))

    md_parts.append(format_summary(len(review_comments), language, complexity))
    return "\n\n".join(md_parts)

def save_report(markdown_text: str, path: Path = REPORT_PATH):
    path.write_text(markdown_text, encoding="utf-8")
    print(f"Saved report → {path.resolve()}")


Create sample JSONs

In [7]:
sample_input = {
    "code_snippet": "def get_active_users(users):\n    results = []\n    for u in users:\n        if u.is_active == True and u.profile_complete == True:\n            results.append(u)\n    return results\n",
    "review_comments": [
        "This is inefficient. Don't loop twice conceptually.",
        "Variable 'u' is a bad name.",
        "Boolean comparison '== True' is redundant."
    ]
}
IN_JSON = OUT_DIR / "input_example.json"
IN_JSON.write_text(json.dumps(sample_input, indent=2), encoding="utf-8")

sample_input_2 = {
    "code_snippet": "def add_numbers(a,b):\n  return a+b\n",
    "review_comments": [
        "Function name doesn't follow snake_case convention.",
        "No type hints provided.",
        "No docstring for function."
    ]
}
IN_JSON_2 = OUT_DIR / "input_example_2.json"
IN_JSON_2.write_text(json.dumps(sample_input_2, indent=2), encoding="utf-8")

print("Sample JSONs created in:", OUT_DIR.resolve())


Sample JSONs created in: /content/mission1_outputs


Load JSON to test

In [12]:
from google.colab import files

def get_input_data():
    print("Choose input method:")
    print("1 → Upload JSON file")
    print("2 → Enter code + comments manually")
    choice = input("Enter 1 or 2: ").strip()

    if choice == "1":
        uploaded = files.upload()
        for fn in uploaded.keys():
            with open(fn, "r", encoding="utf-8") as f:
                return json.load(f)

    elif choice == "2":
        print("Paste or type your code snippet. End input with a line containing only END:")
        code_lines = []
        while True:
            line = input()
            if line.strip().upper() == "END":
                break
            code_lines.append(line)
        code_snippet = "\n".join(code_lines)

        print("Now enter review comments (one per line). End input with END:")
        comments = []
        while True:
            comment = input()
            if comment.strip().upper() == "END":
                break
            comments.append(comment)

        return {"code_snippet": code_snippet, "review_comments": comments}

    else:
        print("Invalid choice — defaulting to sample JSON.")
        return json.loads(IN_JSON.read_text())

# Get the data
data = get_input_data()
code_snippet = data["code_snippet"]
review_comments = data["review_comments"]

print(f"Loaded {len(review_comments)} review comment(s).")


Choose input method:
1 → Upload JSON file
2 → Enter code + comments manually
Enter 1 or 2: 1


Saving sample3.json to sample3.json
Loaded 3 review comment(s).


Generate Report

In [13]:
report_md = generate_report(code_snippet, review_comments)
print(report_md[:500] + ("\n...\n" if len(report_md) > 500 else ""))


# Empathetic Code Review Report

*Generated: 2025-08-14T13:45:33*


**Detected Language:** C/C++


### Analysis of Comment: "Consider using a range-based for loop instead of manual indexing."

* **Positive Rephrasing:** Your `getActiveUsers` function is already working well!  Using range-based for loops can make your code even more readable and concise, enhancing maintainability and potentially improving performance slightly in some scenarios.
* **The 'Why':** Range-based for loops provide a mor
...



Save and Download Report

In [14]:
save_report(report_md, REPORT_PATH)

# Optional: download the report
from google.colab import files
files.download(str(REPORT_PATH))

Saved report → /content/mission1_outputs/report.md


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Preview Markdown in Colab

In [15]:
from IPython.display import display, Markdown

def preview_report(path: Path = REPORT_PATH):
    """Render the saved Markdown report in Colab output."""
    if not path.exists():
        print("Report not found:", path)
        return
    content = path.read_text(encoding="utf-8")
    display(Markdown(content))

# Preview the report
preview_report(REPORT_PATH)


# Empathetic Code Review Report

*Generated: 2025-08-14T13:45:33*


**Detected Language:** C/C++


### Analysis of Comment: "Consider using a range-based for loop instead of manual indexing."

* **Positive Rephrasing:** Your `getActiveUsers` function is already working well!  Using range-based for loops can make your code even more readable and concise, enhancing maintainability and potentially improving performance slightly in some scenarios.
* **The 'Why':** Range-based for loops provide a more expressive and less error-prone way to iterate over containers like vectors.  They eliminate the need for manual index management, reducing the chance of off-by-one errors or other index-related bugs.  Furthermore, they often lead to cleaner, more idiomatic C++ code, making it easier for others (and your future self) to understand and modify.
* **Suggested Improvement:**
```c/c++
#include <iostream>
#include <vector>

struct User {
    bool isActive;
    bool profileComplete;
};

vector<User> getActiveUsers(const vector<User>& users) {
    vector<User> active;
    for (const auto& user : users) { // Range-based for loop
        if (user.isActive && user.profileComplete) { //Simplified condition
            active.push_back(user);
        }
    }
    return active;
}

int main() {
    vector<User> userList = {{true, true}, {false, true}, {true, false}};
    vector<User> activeUsers = getActiveUsers(userList);
    cout << "Active Users: " << activeUsers.size() << endl;
    return 0;
}
```


### Analysis of Comment: "Avoid explicit comparison with '== true' for boolean values."

* **Positive Rephrasing:** Your `getActiveUsers` function is already quite efficient!  A small refinement can make the boolean checks even more concise and readable.
* **The 'Why':** Directly using boolean variables in conditional statements improves readability and reduces visual clutter.  Explicit comparisons to `true` are redundant because the boolean value itself represents true or false.
* **Suggested Improvement:**
```c/c++
#include <iostream>
#include <vector>

struct User {
    bool isActive;
    bool profileComplete;
};

vector<User> getActiveUsers(const vector<User>& users) {
    vector<User> active;
    for (const auto& user : users) {
        if (user.isActive && user.profileComplete) {
            active.push_back(user);
        }
    }
    return active;
}

int main() {
    vector<User> userList = {{true, true}, {false, true}, {true, false}};
    vector<User> activeUsers = getActiveUsers(userList);
    cout << "Active Users: " << activeUsers.size() << endl;
    return 0;
}
```


### Analysis of Comment: "Use const references where possible to avoid unnecessary copying."

* **Positive Rephrasing:** This is a great start to filtering your user data!  To make the function even more efficient and idiomatic C++, we can optimize how we handle the input `users` vector.
* **The 'Why':** Passing large objects by value (without const reference) creates unnecessary copies, impacting performance, especially when dealing with many users. Using `const` ensures the function doesn't modify the original vector, which is good practice for data integrity and readability.
* **Suggested Improvement:**
```c/c++
#include <iostream>
#include <vector>
using namespace std;

struct User {
    bool isActive;
    bool profileComplete;
};

vector<User> getActiveUsers(const vector<User>& users) {
    vector<User> active;
    for (const auto& user : users) { // Range-based for loop and const reference
        if (user.isActive && user.profileComplete) { //Simplified boolean expression
            active.push_back(user); 
        }
    }
    return active;
}

int main() {
    vector<User> userList = {{true, true}, {false, true}, {true, false}};
    vector<User> activeUsers = getActiveUsers(userList);
    cout << "Active Users: " << activeUsers.size() << endl;
    return 0;
}
```


## Overall Summary

- Addressed **3** comment(s).
- Detected language: **C/C++**.
- Code readability and maintainability improved!
