# Zeeguu API Architecture Reconstruction - Basic Radon Analysis

This notebook provides a comprehensive analysis of the Zeeguu API (backend) code using Radon – a Python tool for computing various code metrics. We extract architectural insights using metrics such as:

1. **Cyclomatic Complexity (CC):** Identifies overly complex functions.
2. **Maintainability Index (MI):** Evaluates overall maintainability of modules.
3. **Raw Metrics:** Counts lines of code (LOC, LLOC, SLOC) and comment ratios.

Additionally, the notebook aggregates metrics not only by package but also by subpackage (the full relative directory), and then visualizes the results with bar charts annotated with the number of functions (or files) per group.

All Radon outputs (images) are saved under the directory: `Base_dir/output/radon/`.

In [None]:
import os
import sys
import subprocess

def install_package(package):
    try:
        __import__(package.replace('-', '_'))
        print(f"{package} is already installed.")
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Install radon and additional packages for visualization
install_package('radon')
install_package('matplotlib')
install_package('pandas')
install_package('networkx')
install_package('pydot')
install_package('squarify')

# Verify Radon installation by printing its version
try:
    subprocess.check_call([sys.executable, "-m", "radon", "--version"])
except Exception as e:
    print("Error calling radon. Please ensure it is installed properly.")

In [None]:
# ---- Path Configuration ----
import os

TOOLS_DIR = os.getcwd()  # Expected: ...\Tools
BASE_DIR = os.path.abspath(os.path.join(TOOLS_DIR, '..'))
DATA_DIR = os.path.join(BASE_DIR, 'Data')
API_DIR = os.path.join(DATA_DIR, 'api')

# New variable for radon outputs
RADON_OUTPUT_DIR = os.path.join(BASE_DIR, 'output', 'radon')
os.makedirs(RADON_OUTPUT_DIR, exist_ok=True)

print("TOOLS_DIR:", TOOLS_DIR)
print("BASE_DIR:", BASE_DIR)
print("DATA_DIR:", DATA_DIR)
print("API_DIR:", API_DIR)
print("RADON_OUTPUT_DIR:", RADON_OUTPUT_DIR)

## 1. Basic Project Structure Analysis

We begin by listing the directory structure for the API (backend).

In [None]:
def list_directories(root_dir):
    dirs = []
    for dirpath, dirnames, _ in os.walk(root_dir):
        # Exclude hidden directories
        dirnames[:] = [d for d in dirnames if not d.startswith('.')]
        dirs.append(os.path.relpath(dirpath, root_dir))
    return sorted(dirs)

print("API (Backend) Structure:")
for d in list_directories(API_DIR):
    print(d)

In [None]:
def count_python_files(root_dir):
    count = 0
    for dirpath, _, files in os.walk(root_dir):
        for f in files:
            if f.endswith('.py'):
                count += 1
    return count

api_count = count_python_files(API_DIR)

print("Number of Python files in API (Backend): ", api_count)

## 2-5 API Files Analysis

In the next cells we run Radon to compute various metrics (CC, MI, and raw metrics) for the API code.

In [None]:
# This cell uses API_DIR and simply runs and prints the radon commands
import os
import sys
import subprocess

def run_and_print(command):
    print("Running command:", ' '.join(command))
    result = subprocess.run(command, capture_output=True, text=True)
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr)

# 2. Cyclomatic Complexity Analysis
print("API Cyclomatic Complexity:")
run_and_print([sys.executable, "-m", "radon", "cc", API_DIR, "-s", "-a"])

# 3. Maintainability Index Analysis
print("API Maintainability Index:")
run_and_print([sys.executable, "-m", "radon", "mi", API_DIR])

# 4. Top 10 Most Complex Files
print("Top 10 Most Complex Files in API:")
run_and_print([sys.executable, "-m", "radon", "cc", API_DIR, "-s", "-n", "C", "-o", "SCORE"])

# 5. Raw Metrics Analysis
print("API Raw Metrics:")
run_and_print([sys.executable, "-m", "radon", "raw", API_DIR, "-s"])

## Visualization of Aggregated Metrics by Package

Below we generate bar charts of average metrics by package. Each bar is annotated with the count (number of functions or files) in the package.

In [None]:
import os
import sys
import json
import subprocess
from collections import defaultdict
import matplotlib.pyplot as plt

def aggregate_cc_by_package(base_dir):
    cmd = [sys.executable, "-m", "radon", "cc", base_dir, "-j", "-s", "-a"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    pkg_data = defaultdict(list)
    for filepath, funcs in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = relpath.split(os.sep)[0] if os.sep in relpath else "root"
        for func in funcs:
            pkg_data[pkg].append(func["complexity"])
    return pkg_data

def aggregate_mi_by_package(base_dir):
    cmd = [sys.executable, "-m", "radon", "mi", base_dir, "-j"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    pkg_data = defaultdict(list)
    for filepath, mi_value in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = relpath.split(os.sep)[0] if os.sep in relpath else "root"
        if isinstance(mi_value, dict):
            pkg_data[pkg].append(mi_value.get("mi", 0))
        else:
            pkg_data[pkg].append(mi_value)
    return pkg_data

def aggregate_raw_by_package(base_dir):
    cmd = [sys.executable, "-m", "radon", "raw", base_dir, "-j", "-s"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    metrics = ["loc", "lloc", "sloc", "comments", "single_comments", "multi", "blank"]
    pkg_totals = defaultdict(lambda: {metric: 0 for metric in metrics})
    pkg_file_count = defaultdict(int)
    for filepath, stats in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = relpath.split(os.sep)[0] if os.sep in relpath else "root"
        pkg_file_count[pkg] += 1
        for metric in metrics:
            pkg_totals[pkg][metric] += stats.get(metric, 0)
    return pkg_totals, pkg_file_count

if __name__ == "__main__":
    # Aggregate CC
    cc_pkg = aggregate_cc_by_package(API_DIR)
    pkg_names = list(cc_pkg.keys())
    avg_cc = [sum(cc_pkg[p]) / len(cc_pkg[p]) if cc_pkg[p] else 0 for p in pkg_names]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(pkg_names, avg_cc, color='purple')
    plt.xlabel("Package")
    plt.ylabel("Average Cyclomatic Complexity")
    plt.title("Average CC by Package (API)")
    for bar, pkg in zip(bars, pkg_names):
        count = len(cc_pkg[pkg])
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, f"n={count}", ha='center', va='bottom', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_cc_by_package.png'))
    plt.show()

    # Aggregate MI
    mi_pkg = aggregate_mi_by_package(API_DIR)
    pkg_names_mi = list(mi_pkg.keys())
    avg_mi = [sum(mi_pkg[p]) / len(mi_pkg[p]) if mi_pkg[p] else 0 for p in pkg_names_mi]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(pkg_names_mi, avg_mi, color='teal')
    plt.xlabel("Package")
    plt.ylabel("Average Maintainability Index")
    plt.title("Average MI by Package (API)")
    for bar, pkg in zip(bars, pkg_names_mi):
        count = len(mi_pkg[pkg])
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, f"n={count}", ha='center', va='bottom', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_mi_by_package.png'))
    plt.show()

    # Aggregate Raw Metrics
    raw_pkg, pkg_file_count = aggregate_raw_by_package(API_DIR)
    pkg_names_raw = list(raw_pkg.keys())
    avg_loc = [raw_pkg[p]['loc'] / pkg_file_count[p] if pkg_file_count[p] else 0 for p in pkg_names_raw]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(pkg_names_raw, avg_loc, color='orange')
    plt.xlabel("Package")
    plt.ylabel("Average LOC per File")
    plt.title("Average LOC per File by Package (API)")
    for bar, pkg in zip(bars, pkg_names_raw):
        count = pkg_file_count[pkg]
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, f"n={count}", ha='center', va='bottom', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_loc_by_package.png'))
    plt.show()

## Visualization for Subpackages

Below we visualize the average metrics aggregated by subpackage (the full relative directory). As before, each horizontal bar is annotated with the count (number of functions or files) for that subpackage.

In [None]:
import os
import sys
import json
import subprocess
from collections import defaultdict
import matplotlib.pyplot as plt

def aggregate_cc_by_subpackage(base_dir):
    cmd = [sys.executable, "-m", "radon", "cc", base_dir, "-j", "-s", "-a"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    pkg_data = defaultdict(list)
    for filepath, funcs in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = os.path.dirname(relpath)
        if pkg == "":
            pkg = "root"
        for func in funcs:
            pkg_data[pkg].append(func["complexity"])
    return pkg_data

def aggregate_mi_by_subpackage(base_dir):
    cmd = [sys.executable, "-m", "radon", "mi", base_dir, "-j"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    pkg_data = defaultdict(list)
    for filepath, mi_value in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = os.path.dirname(relpath)
        if pkg == "":
            pkg = "root"
        if isinstance(mi_value, dict):
            pkg_data[pkg].append(mi_value.get("mi", 0))
        else:
            pkg_data[pkg].append(mi_value)
    return pkg_data

def aggregate_raw_by_subpackage(base_dir):
    cmd = [sys.executable, "-m", "radon", "raw", base_dir, "-j", "-s"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    metrics = ["loc", "lloc", "sloc", "comments", "single_comments", "multi", "blank"]
    pkg_totals = defaultdict(lambda: {metric: 0 for metric in metrics})
    pkg_file_count = defaultdict(int)
    for filepath, stats in data.items():
        relpath = os.path.relpath(filepath, base_dir)
        pkg = os.path.dirname(relpath)
        if pkg == "":
            pkg = "root"
        pkg_file_count[pkg] += 1
        for metric in metrics:
            pkg_totals[pkg][metric] += stats.get(metric, 0)
    return pkg_totals, pkg_file_count

if __name__ == "__main__":
    # Aggregate Cyclomatic Complexity by subpackage
    cc_subpkg = aggregate_cc_by_subpackage(API_DIR)
    subpkg_names = list(cc_subpkg.keys())
    avg_cc = [sum(cc_subpkg[p]) / len(cc_subpkg[p]) if cc_subpkg[p] else 0 for p in subpkg_names]

    plt.figure(figsize=(12, 6))
    bars = plt.barh(subpkg_names, avg_cc, color='indigo')
    plt.xlabel("Average Cyclomatic Complexity")
    plt.title("Average CC by Subpackage (API)")
    for bar, subpkg in zip(bars, subpkg_names):
        count = len(cc_subpkg[subpkg])
        plt.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2, f"n={count}", va='center', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_cc_by_subpackage.png'))
    plt.show()

    # Aggregate Maintainability Index by subpackage
    mi_subpkg = aggregate_mi_by_subpackage(API_DIR)
    subpkg_names_mi = list(mi_subpkg.keys())
    avg_mi = [sum(mi_subpkg[p]) / len(mi_subpkg[p]) if mi_subpkg[p] else 0 for p in subpkg_names_mi]

    plt.figure(figsize=(12, 6))
    bars = plt.barh(subpkg_names_mi, avg_mi, color='coral')
    plt.xlabel("Average Maintainability Index")
    plt.title("Average MI by Subpackage (API)")
    for bar, subpkg in zip(bars, subpkg_names_mi):
        count = len(mi_subpkg[subpkg])
        plt.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2, f"n={count}", va='center', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_mi_by_subpackage.png'))
    plt.show()

    # Aggregate Raw Metrics by subpackage
    raw_subpkg, subpkg_file_count = aggregate_raw_by_subpackage(API_DIR)
    subpkg_names_raw = list(raw_subpkg.keys())
    avg_loc = [raw_subpkg[p]['loc'] / subpkg_file_count[p] if subpkg_file_count[p] else 0 for p in subpkg_names_raw]

    plt.figure(figsize=(12, 6))
    bars = plt.barh(subpkg_names_raw, avg_loc, color='darkorange')
    plt.xlabel("Average LOC per File")
    plt.title("Average LOC per File by Subpackage (API)")
    for bar, subpkg in zip(bars, subpkg_names_raw):
        count = subpkg_file_count[subpkg]
        plt.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2, f"n={count}", va='center', fontsize=10)
    plt.tight_layout()
    plt.savefig(os.path.join(RADON_OUTPUT_DIR, 'average_loc_by_subpackage.png'))
    plt.show()
