In [2]:
# ✅ Install Gemini SDK
!pip install -q google-generativeai

# 1️⃣ Imports & Config
from google.colab import files as colab_files
import google.generativeai as genai
import zipfile, os, shutil, json, re

# Configure Gemini API key
genai.configure(api_key="AIzaSyBMHicncTc5kWlwyI4kJ4OBP2UA9x0WToM")  # ← Replace with yours

# Supported extensions
EXT_DIRS = [
    'py','c','cpp','js','ts','java','kt','swift','rb','go','rs','php',
    'html','css','json','xml','yaml','yml','sh','bat','sql','pl','r',
    'scala','bash','dockerfile','md'
]

# 2️⃣ Upload & extract your ZIP
uploaded = colab_files.upload()
orig_zip = next(iter(uploaded))
print(f"📦 Uploaded: {orig_zip}")

extract_root = "uploads"
if os.path.exists(extract_root): shutil.rmtree(extract_root)
os.makedirs(extract_root, exist_ok=True)
with zipfile.ZipFile(orig_zip, 'r') as z: z.extractall(extract_root)
print("✅ ZIP extracted to /content/uploads")

# 3️⃣ Detect README.md as Project Aim
project_aim = None
for root, _, files in os.walk(extract_root):
    for file in files:
        if file.lower() == "readme.md":
            with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                project_aim = f.read()
            print("🧠 README.md found — using it as the project aim.")
            break
if not project_aim:
    print("⚠️ No README.md found. Defaulting to generic refactor strategy.")

# 4️⃣ Segregate code files
segregated = {ext: [] for ext in EXT_DIRS}
for root, _, fnames in os.walk(extract_root):
    for fn in fnames:
        if '.' not in fn: continue
        ext = fn.rsplit('.',1)[1].lower()
        if ext in EXT_DIRS:
            tgt = os.path.join("code_dump", ext)
            os.makedirs(tgt, exist_ok=True)
            src = os.path.join(root, fn)
            dst = os.path.join(tgt, fn)
            shutil.copy(src, dst)
            segregated[ext].append(dst)
print("✅ Files segregated into code_dump/")

# 5️⃣ Dump to .txt
txt_root = "code_dump_txt"
if os.path.exists(txt_root): shutil.rmtree(txt_root)
os.makedirs(txt_root, exist_ok=True)
for ext, paths in segregated.items():
    for fp in paths:
        name = os.path.splitext(os.path.basename(fp))[0] + ".txt"
        tgt = os.path.join(txt_root, ext)
        os.makedirs(tgt, exist_ok=True)
        with open(fp, 'r', encoding='utf-8') as src, open(os.path.join(tgt, name), 'w', encoding='utf-8') as dst:
            dst.write(src.read())
print("✅ Code dumped into text at code_dump_txt/")

# 6️⃣ Gemini Analysis Function
def analyze_code(code_input, fname, aim=None):
    model = genai.GenerativeModel(model_name="gemini-2.0-flash")

    if aim:
        prompt = f"""
You are a professional software architect. The project goal is:

📘 **Project Aim (from README.md)**:
{aim.strip()}

Now analyze the following file `{fname}` and refactor it while preserving project intent:

{code_input}


Provide an in-depth review and updated version of the file. Final output must include:
- Detailed feedback
- Updated version of the code in triple backticks
"""
    else:
        prompt = f"""
You are a highly experienced software engineer and code reviewer. Analyze the snippet from `{fname}`:

{code_input}

Provide an in‑depth review across these sections, then output the updated code in triple backticks.
1. Syntax & Runtime Errors
- Identify syntax errors that would prevent compilation/execution
- Detect potential runtime exceptions and error handling issues
- Check for language-specific syntax pitfalls and deprecated features
- Examine boundary conditions that could cause crashes
- Validate parameter types and return values

2. Logical Errors & Edge Cases
- Analyze conditional logic for correctness and completeness
- Identify off-by-one errors and boundary condition handling
- Test with extreme inputs (empty, null, maximum values, etc.)
- Verify mathematical calculations and business logic implementation
- Check for race conditions in concurrent/parallel code
- Validate state transitions and side effects

3. Design & Structure
- Evaluate adherence to design patterns and architectural principles
- Assess modularity, coupling, and cohesion metrics
- Review class/function responsibilities (Single Responsibility Principle)
- Check interface design and abstraction layers
- Evaluate inheritance hierarchies and composition models
- Assess testability of components

4. Performance & Scalability
- Identify algorithmic complexity issues (time/space complexity)
- Detect inefficient data structures or algorithms
- Identify potential memory leaks and resource management issues
- Review database queries and I/O operations for optimization
- Evaluate caching strategies and opportunities
- Assess thread safety and concurrency models
- Profile computational bottlenecks
- Analyze scaling dimensions (vertical vs horizontal)

5. Readability & Best Practices
- Check adherence to language-specific style guides
- Evaluate naming conventions and consistency
- Assess code documentation and comments
- Check for code duplication and unnecessary complexity
- Review appropriate use of language features
- Analyze code formatting and organization

6. Security
- Identify injection vulnerabilities (SQL, XSS, CSRF, etc.)
- Check for authentication and authorization issues
- Review cryptographic implementations
- Assess handling of sensitive data
- Evaluate input validation and sanitization
- Check for hardcoded credentials and secrets
- Review dependency vulnerabilities
- Analyze access control mechanisms

7. Testing & Robustness
- Evaluate test coverage (unit, integration, system)
- Check for error handling comprehensiveness
- Assess edge case handling
- Review exception management strategies
- Verify graceful degradation under failure
- Check logging and monitoring capabilities
- Review recovery mechanisms

8. Maintainability & Future-Proofing
- Assess code flexibility for future requirements
- Check for tech debt and code smells
- Evaluate documentation quality
- Review versioning and compatibility considerations
- Assess dependency management
- Check for extensibility points
- Evaluate configuration vs. hardcoding
- Review deployment and CI/CD compatibility

9. Final Summary & Updated Code
"""

    resp = model.generate_content(contents=[{"role": "user", "parts": [prompt]}])
    return resp.text

def extract_update(text):
    blocks = re.findall(r"```(?:[a-zA-Z]*)?\n(.*?)```", text, re.DOTALL)
    return blocks[-1].strip() if blocks else None

# 7️⃣ Process each code file
results = []
for ext in sorted(os.listdir(txt_root)):
    folder = os.path.join(txt_root, ext)
    if not os.path.isdir(folder): continue
    for txt_file in sorted(os.listdir(folder)):
        fp = os.path.join(folder, txt_file)
        print(f"\n📂 Analyzing {fp} …")
        code = open(fp, 'r', encoding='utf-8').read()
        analysis = analyze_code(code, txt_file, aim=project_aim)
        updated = extract_update(analysis)
        results.append({"file":txt_file, "lang":ext, "analysis":analysis, "updated":updated})

        # Overwrite the original extracted file
        original_name = txt_file.rsplit('.',1)[0] + f".{ext}"
        for root, _, fnames in os.walk(extract_root):
            if original_name in fnames:
                with open(os.path.join(root, original_name), 'w', encoding='utf-8') as f:
                    if updated: f.write(updated)
        print(analysis)
        print("\n" + "="*80 + "\n")

# 8️⃣ Re-zip updated files
out_zip = orig_zip.rsplit('.',1)[0] + "_updated.zip"
with zipfile.ZipFile(out_zip,'w',zipfile.ZIP_DEFLATED) as zout:
    for root, _, fnames in os.walk(extract_root):
        for fn in fnames:
            full = os.path.join(root, fn)
            arc = os.path.relpath(full, extract_root)
            zout.write(full, arc)
print(f"\n✅ Final ZIP created: {out_zip}")

# 9️⃣ Save results
with open("analysis_results.json", "w") as jf:
    json.dump(results, jf, indent=2)

colab_files.download(out_zip)
colab_files.download("analysis_results.json")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  },
  {
    "country_code": "IE",
    "country_name": "Ireland",
    "continent_name": "Europe"
  },
  {
    "country_code": "IL",
    "country_name": "Israel",
    "continent_name": "Asia"
  },
  {
    "country_code": "IM",
    "country_name": "Isle of Man",
    "continent_name": "Europe"
  },
  {
    "country_code": "IN",
    "country_name": "India",
    "continent_name": "Asia"
  },
  {
    "country_code": "IO",
    "country_name": "British Indian Ocean Territory",
    "continent_name": "Asia"
  },
  {
    "country_code": "IQ",
    "country_name": "Iraq",
    "continent_name": "Asia"
  },
  {
    "country_code": "IR",
    "country_name": "Iran",
    "continent_name": "Asia"
  },
  {
    "country_code": "IS",
    "country_name": "Iceland",
    "continent_name": "Europe"
  },
  {
    "country_code": "IT",
    "country_name": "Italy",
    "continent_name": "Europe"
  },
  {
    "country_code": "JE",
    "country_name": "

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>