In [3]:
"""
🛡️ SecuScan — Security & Compliance Scanner
Author: Siddharth Raut
License: MIT
--------------------------------------------
Lightweight Python-based security scanner for codebases.
Scans for:
  • Hardcoded secrets (AWS keys, passwords, etc.)
  • Vulnerable dependency versions
  • Invalid licenses in README
No external dependencies required.
"""

import os
import re
import json
import argparse
import hashlib
from datetime import datetime


# -----------------------------
# 🔒 Configuration
# -----------------------------
SECRETS_REGEX = [
    (r"AKIA[0-9A-Z]{16}", "AWS Access Key"),
    (r"(?i)password\s*=\s*[\"'].*?[\"']", "Hardcoded Password"),
    (r"(?i)secret\s*=\s*[\"'].*?[\"']", "Hardcoded Secret"),
]

VULNERABLE_PACKAGES = {
    "requests": ["2.19.0", "2.18.4"],
    "flask": ["0.12", "1.0"],
    "urllib3": ["1.24", "1.25.1"]
}

LICENSE_WHITELIST = ["MIT", "Apache-2.0", "BSD-3-Clause"]


# -----------------------------
# ⚙️ Utility Functions
# -----------------------------
def hash_file(path):
    """Return SHA256 checksum for a file"""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            h.update(chunk)
    return h.hexdigest()


def scan_secrets(file_path):
    """Scan a file for secrets using regex rules"""
    issues = []
    try:
        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
            for i, line in enumerate(f, start=1):
                for pattern, desc in SECRETS_REGEX:
                    if re.search(pattern, line):
                        issues.append({
                            "file": file_path,
                            "line": i,
                            "issue": desc,
                            "match": line.strip()
                        })
    except Exception:
        pass
    return issues


def scan_dependencies(requirements_path):
    """Check requirements.txt for vulnerable versions"""
    vulns = []
    if not os.path.exists(requirements_path):
        return vulns

    with open(requirements_path, "r", encoding="utf-8") as f:
        for line in f:
            if "==" in line:
                pkg, ver = line.strip().split("==")
                if pkg in VULNERABLE_PACKAGES and ver in VULNERABLE_PACKAGES[pkg]:
                    vulns.append({
                        "package": pkg,
                        "version": ver,
                        "severity": "critical"
                    })
    return vulns


def detect_license(readme_path):
    """Simple license detection"""
    if not os.path.exists(readme_path):
        return "Unknown"
    with open(readme_path, "r", encoding="utf-8", errors="ignore") as f:
        text = f.read().lower()
        if "mit license" in text:
            return "MIT"
        if "apache" in text:
            return "Apache-2.0"
        if "bsd" in text:
            return "BSD-3-Clause"
    return "Unknown"


# -----------------------------
# 🧾 Core Functionality
# -----------------------------
def generate_report(scan_path, output):
    """Run all checks and generate JSON report"""
    report = {
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "path": os.path.abspath(scan_path),
        "summary": {
            "files_scanned": 0,
            "secrets_found": 0,
            "vulnerabilities": 0,
            "licenses_invalid": 0
        },
        "details": {
            "secrets": [],
            "vulnerabilities": [],
            "licenses": []
        }
    }

    # 🔍 Scan files for secrets
    for root, _, files in os.walk(scan_path):
        for f in files:
            full_path = os.path.join(root, f)
            if f.endswith((".py", ".env", ".cfg", ".ini")):
                report["summary"]["files_scanned"] += 1
                secrets = scan_secrets(full_path)
                report["details"]["secrets"].extend(secrets)
                report["summary"]["secrets_found"] += len(secrets)

    # 📦 Check dependencies
    dep_path = os.path.join(scan_path, "requirements.txt")
    vulns = scan_dependencies(dep_path)
    report["details"]["vulnerabilities"].extend(vulns)
    report["summary"]["vulnerabilities"] = len(vulns)

    # 📜 License validation
    license_type = detect_license(os.path.join(scan_path, "README.md"))
    report["details"]["licenses"].append(license_type)
    if license_type not in LICENSE_WHITELIST:
        report["summary"]["licenses_invalid"] = 1

    # 💾 Save report
    os.makedirs(os.path.dirname(output), exist_ok=True)
    with open(output, "w") as f:
        json.dump(report, f, indent=2)

    print("\n✅ Security scan complete.")
    print(f"📄 Report saved to: {output}\n")
    print("Summary:")
    print(json.dumps(report["summary"], indent=2))


# -----------------------------
# 🚀 CLI Entrypoint (Notebook-Safe)
# -----------------------------
if __name__ == "__main__":
    import sys
    parser = argparse.ArgumentParser(description="SecuScan - Security & Compliance Scanner")
    parser.add_argument("--path", type=str, default=".", help="Path to scan")
    parser.add_argument("--output", type=str, default="reports/report.json", help="Output report path")

    # ✅ Ignore Jupyter/Colab hidden args (like -f)
    args, unknown = parser.parse_known_args(sys.argv[1:])

    generate_report(args.path, args.output)



✅ Security scan complete.
📄 Report saved to: reports/report.json

Summary:
{
  "files_scanned": 0,
  "secrets_found": 0,
  "vulnerabilities": 0,
  "licenses_invalid": 1
}


  "timestamp": datetime.utcnow().isoformat() + "Z",
