In [None]:
import os, sys, json, subprocess, textwrap, datetime, pathlib, nbformat as nbf

# Ensure nbformat is available (should already be, but just in case)
try:
    import nbformat as nbf
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "nbformat"])
    import nbformat as nbf

# Notebook filename
nb_path = pathlib.Path("/mnt/data/sypec_workflow.ipynb")

# Helper to build markdown text with dedent
def md(txt):
    return textwrap.dedent(txt).strip()

# Create a new notebook
nb = nbf.v4.new_notebook(metadata={
    "kernelspec": {
        "name": "python3",
        "display_name": "Python 3",
        "language": "python"
    },
    "language_info": {
        "name": "python",
        "version": sys.version.split()[0]
    }
})

cells = []

# 1. Title & Intro
cells.append(nbf.v4.new_markdown_cell(md(f"""
# 🟢 Sypec Sustainability Analysis Notebook

Generated on **{datetime.date.today()}**.  
This notebook orchestrates an end-to-end sustainability audit for any public GitHub repository.

> **Workflow outline**  
> 1. Install & import dependencies  \n2. Clone the repository  \n3. Run static analysis  \n4. (Optional) Execute runtime probes  \n5. Estimate energy & resource usage  \n6. Ask ChatGPT-o3 to draft narrative sections  \n7. Generate graphs  \n8. Build & compile the PDF report
""")))

# 2. Dependencies cell
cells.append(nbf.v4.new_markdown_cell(md("""
## 0️⃣ Environment & Dependencies

Run the next cell to install required packages. Comment out lines you don't need if already installed.
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
# ⚠️ Feel free to trim this list for your environment
!pip install --quiet gitpython cloc lizard radon scaphandre \
                  numpy pandas matplotlib jinja2 openai nbformat \
                  tqdm
""")))

# 3. Config cell
cells.append(nbf.v4.new_markdown_cell(md("""
## 1️⃣ Configuration

Set your GitHub repository URL and analysis parameters here.
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
from pathlib import Path
import os, json, datetime

# --- User inputs ------------------------------------------------------------
repo_url = "https://github.com/org/repo"    # ← change me!
traffic_assumptions = {
    "active_users": 10_000,
    "requests_per_user_per_day": 5
}
openai_api_key = "YOUR_OPENAI_KEY"          # secure: env variable recommended

# --- Derived paths ----------------------------------------------------------
work_dir = Path.cwd() / "sypec_run"
repo_dir = work_dir / "repo"

work_dir.mkdir(exist_ok=True)
print("Working directory:", work_dir)
""")))

# 4. Clone repository
cells.append(nbf.v4.new_markdown_cell(md("""
## 2️⃣ Clone Repository
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
import git, shutil

if repo_dir.exists():
    print("Removing previous repo clone …")
    shutil.rmtree(repo_dir)

print(f"Cloning {repo_url} …")
git.Repo.clone_from(repo_url, repo_dir)
print("Clone complete, size:", sum(p.stat().st_size for p in repo_dir.rglob('*'))/1e6, "MB")
""")))

# 5. Static analysis
cells.append(nbf.v4.new_markdown_cell(md("""
## 3️⃣ Static Analysis
Run CLOC, Radon, Lizard, etc. to gather code metrics.
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
import subprocess, json, tempfile, re, pandas as pd

def run(cmd):
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode:
        raise RuntimeError(result.stderr)
    return result.stdout

# --- Count lines of code (cloc JSON) ---------
cloc_json = run(["cloc", "--json", "--quiet", str(repo_dir)])
cloc_metrics = json.loads(cloc_json)["SUM"]
print("CLOC summary:", cloc_metrics)

# --- Radon maintainability -------------------
radon_out = run(["radon", "mi", "-j", str(repo_dir)])
radon_scores = json.loads(radon_out)
mean_mi = sum(radon_scores.values()) / len(radon_scores)
print("Mean maintainability index:", round(mean_mi, 2))

# --- Cyclomatic complexity (lizard) ----------
lizard_out = run(["lizard", "-C", "cyc", "-l", "python", str(repo_dir)])
complexities = [int(m.group(1)) for m in re.finditer(r'cyclomatic_complexity\s*=\s*(\\d+)', lizard_out)]
avg_cyc = sum(complexities)/len(complexities) if complexities else 0
print("Average cyclomatic complexity:", round(avg_cyc, 2))
""")))

# 6. Runtime probe (optional)
cells.append(nbf.v4.new_markdown_cell(md("""
## 4️⃣ Runtime Probe (Optional)

If your project has tests or a sample workload, uncomment and customise the next cell to measure real energy via *scaphandre* (or any agent). Otherwise, skip to Section 5.
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
# Example placeholder — requires root & scaphandre installed
# !scaphandre json -s 5 -d 60 -o energy.json &
# !pytest
# !pkill scaphandre
""")))

# 7. Energy model
cells.append(nbf.v4.new_markdown_cell(md("""
## 5️⃣ Energy Estimation Model
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
import math, numpy as np

def estimate_energy(cpu_seconds: float, mem_gb_seconds: float, net_gb: float) -> float:
    \"\"\"Return estimated kWh.\"\"\"
    return 0.0003 * cpu_seconds + 0.0004 * mem_gb_seconds + 0.06 * net_gb

# HYPOTHETICAL example values — replace with your measurements
cpu_s, mem_gb_s, net_gb = 1200, 500, 2
baseline_kwh = estimate_energy(cpu_s, mem_gb_s, net_gb)
print("Baseline energy (single workload):", round(baseline_kwh, 4), "kWh")
""")))

# 8. ChatGPT-o3 reasoning
cells.append(nbf.v4.new_markdown_cell(md("""
## 6️⃣ Generate Narrative Sections with ChatGPT-o3
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
import openai, tiktoken, json
openai.api_key = openai_api_key

metrics = dict(
    cloc=cloc_metrics,
    maintainability=mean_mi,
    complexity=avg_cyc,
    baseline_kwh=baseline_kwh,
    assumptions=traffic_assumptions
)

system_msg = \"\"\"You are Sypec, a senior green-software auditor.\\n
Return JSON with keys: intro, problems, methodology, improvements, caveats.\"\"\"

res = openai.ChatCompletion.create(
    model="o3",
    temperature=0.2,
    messages=[
        {"role": "system", "content": system_msg},
        {"role": "user", "content": json.dumps(metrics)}
    ]
)
narrative = json.loads(res.choices[0].message.content)
print(narrative.keys())
""")))

# 9. Graph generation
cells.append(nbf.v4.new_markdown_cell(md("""
## 7️⃣ Generate Graphs
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
import matplotlib.pyplot as plt

users = np.logspace(2, 5, 50)  # 1e2 … 1e5
energy_per_user = baseline_kwh / (traffic_assumptions['requests_per_user_per_day'] * 30)
total_energy_curve = users * energy_per_user / 1000  # MWh for scale

plt.figure()
plt.plot(users, total_energy_curve)
plt.xscale('log')
plt.xlabel('Monthly Active Users')
plt.ylabel('Projected Energy (MWh/month)')
plt.title('Energy vs. Users')
plt.grid(True)
plt.savefig('energy_users.png', dpi=150)

# Resource breakdown dummy
labels = ['CPU', 'Memory', 'Network']
vals = [0.65, 0.25, 0.10]
plt.figure()
plt.bar(labels, vals)
plt.ylabel('Energy share')
plt.title('Resource Energy Breakdown')
plt.savefig('resource_breakdown.png', dpi=150)
""")))

# 10. Build LaTeX report
cells.append(nbf.v4.new_markdown_cell(md("""
## 8️⃣ Build LaTeX Report
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
from jinja2 import Environment, FileSystemLoader

env = Environment(loader=FileSystemLoader(str(Path('.'))))
context = dict(
    repo_name=repo_url.split('/')[-1],
    report_date=str(datetime.date.today()),
    total_energy=round(total_energy_curve[-1], 2),
    energy_per_user=round(energy_per_user, 4),
    median_requests=traffic_assumptions['requests_per_user_per_day'],
    test_energy=round(baseline_kwh, 4),
    annual_energy=round(total_energy_curve[-1]*12, 2),
    score=75,  # placeholder
    index_letter='B',
    index_color='YellowOrange',
    code_quality=round(mean_mi/10*30, 1),
    docs_score=15,                # placeholder
    complexity_score=max(0, 15 - (avg_cyc - 5)*3),
    energy_score=18,              # placeholder
    scalability_score=12,         # placeholder
    improvements=narrative['improvements'],
    intro=narrative['intro'],
    problems=narrative['problems'],
    methodology=narrative['methodology'],
    caveats=narrative['caveats'],
    report_energy=round(res.usage.total_tokens*6e-7, 6)
)

template_path = Path('sypec_report_template.tex')
if not template_path.exists():
    raise FileNotFoundError('LaTeX template not found! Place it next to this notebook.')

tex = Environment(loader=FileSystemLoader('.')).get_template('sypec_report_template.tex').render(**context)
open('report.tex','w').write(tex)

# Compile
!pdflatex -interaction=nonstopmode report.tex
""")))

# 11. Deliverable
cells.append(nbf.v4.new_markdown_cell(md("""
## 9️⃣ View / Download Report
""")))

cells.append(nbf.v4.new_code_cell(textwrap.dedent("""
from IPython.display import IFrame
IFrame('report.pdf', width=800, height=600)
""")))

# Finalize notebook
nb['cells'] = cells
nbf.write(nb, nb_path)

print(f"Notebook created at: {nb_path}")
