In [None]:
 # --- Requirements schreiben & installieren
%%writefile requirements.txt
transformers
accelerate
bitsandbytes
datasets
evaluate
sacrebleu
codecarbon>=2.5,<3
pynvml>=11.5.0
psutil
numpy
pandas
huggingface_hub

In [None]:
!pip install -q -r requirements.txt
!pip uninstall -y -q google-genai firebase-admin || true
!pip show fief-client || echo "fief-client nicht installiert"

In [None]:
# --- Google Drive mounten ---
from google.colab import drive
drive.mount('/content/drive')

# --- Projektordner setzen ---
import os, pathlib, re
project_path = "/content/drive/MyDrive/LLM-Effizienz/4_2_Baseline"
pathlib.Path(project_path).mkdir(parents=True, exist_ok=True)
os.chdir(project_path)
print("Arbeitsordner:", os.getcwd())

In [None]:
# --- Hugging Face Login via Colab-Secret
from google.colab import userdata
from huggingface_hub import login

hf_token = userdata.get("HF_TOKEN")
if hf_token:
    login(hf_token)
    print("Hugging Face Login erfolgreich!")
else:
    print("WARNUNG: Kein HF_TOKEN gefunden – öffentliche Modelle meist trotzdem ladbar.")

In [None]:
# --- Imports & Setup ---
import warnings; warnings.filterwarnings("ignore")
import time, math, gc, platform, inspect
from dataclasses import dataclass, asdict
from contextlib import nullcontext
from typing import Optional, Tuple
from types import SimpleNamespace

import psutil
import numpy as np
import pandas as pd
import torch

from datasets import load_dataset
import evaluate
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
from codecarbon import EmissionsTracker, __version__ as cc_ver

print("CodeCarbon-Version:", cc_ver)

set_seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"

if device == "cuda":
    gpu_name = torch.cuda.get_device_name(0)
    vram_total_gb = torch.cuda.get_device_properties(0).total_memory/(1024**3)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.set_grad_enabled(False)
else:
    gpu_name = "CPU"
    vram_total_gb = 0.0

print(f"Device: {device} | GPU: {gpu_name} | VRAM: {vram_total_gb:.1f} GB | Torch {torch.__version__} | Python {platform.python_version()}")

In [None]:
# ========== Standort-Konfiguration ==========
USE_GCP_REGION = True  # True => google/europe-west10 (Berlin), False => deutscher Strommix (DEU)

COUNTRY_ISO_CODE = "DEU"          # Ländermix Deutschland
CLOUD_PROVIDER   = "google"       # Cloud-Anbieter
CLOUD_REGION     = "europe-west10"  # GCP Berlin
# Referenz: GCP Frankfurt=europe-west3, Niederlande=europe-west4, Berlin=europe-west10

In [None]:
# -------- CodeCarbon Helfer & Fallbacks --------
import os as _os, time as _time

def tracker_kwargs_base():
    """
    Gemeinsame Tracker-Parameter:
    - feines Power-Sampling (1s)
    - 'process' Tracking (nur aktueller Prozess)
    - Standortwahl:
        * USE_GCP_REGION=True  -> cloud_provider/region (hier: Google Berlin)
        * USE_GCP_REGION=False -> country_iso_code=DEU
    Hinweis: Falls beides gesetzt ist, priorisiert CodeCarbon i. d. R. die Cloud-Region.
    """
    base = dict(log_level="error", output_dir=".")
    try:
        sig = inspect.signature(EmissionsTracker.__init__)
        if "measure_power_secs" in sig.parameters:
            base["measure_power_secs"] = 1
        if "tracking_mode" in sig.parameters:
            base["tracking_mode"] = "process"

        if USE_GCP_REGION:
            # Cloud-Region explizit setzen (Berlin)
            if "cloud_provider" in sig.parameters:
                base["cloud_provider"] = CLOUD_PROVIDER
            if "cloud_region" in sig.parameters:
                base["cloud_region"] = CLOUD_REGION
            # Optional: country zusätzlich setzen (wird typischerweise ignoriert, schadet aber nicht)
            if "country_iso_code" in sig.parameters:
                base["country_iso_code"] = COUNTRY_ISO_CODE
        else:
            # Deutscher Strommix (ohne Cloud-Autodetektion)
            if "country_iso_code" in sig.parameters:
                base["country_iso_code"] = COUNTRY_ISO_CODE
            if "cloud_provider" in sig.parameters:
                base["cloud_provider"] = None
            if "cloud_region" in sig.parameters:
                base["cloud_region"] = None
    except Exception:
        pass
    return base

def make_tracker_named(project_name: str, output_file: str):
    # Eigener Cache pro Run (vermeidet Lock-Konflikte)
    cache_dir = f"/content/.codecarbon_cache_{project_name}_{int(_time.time())}"
    _os.environ["CODECARBON_CACHE_DIR"] = cache_dir
    # evtl. alten Lock entfernen (best effort)
    for d in (_os.path.expanduser("~/.codecarbon"), "/content/.codecarbon"):
        lock_file = _os.path.join(d, "codecarbon.lock")
        if os.path.exists(lock_file):
            try: os.remove(lock_file)
            except: pass
    return EmissionsTracker(project_name=project_name, output_file=output_file, **tracker_kwargs_base())

def safe_start(tracker):
    try:
        tracker.start(); return True
    except Exception as e:
        print(f"[CodeCarbon] Start fehlgeschlagen: {e} → Fallback 0/0."); return False

def safe_stop(tracker, started: bool):
    if not started:  # nichts gemessen
        return SimpleNamespace(energy_consumed=0.0, emissions=0.0)
    try:
        return tracker.stop()
    except Exception as e:
        print(f"[CodeCarbon] Stop fehlgeschlagen: {e} → Fallback 0/0.")
        return SimpleNamespace(energy_consumed=0.0, emissions=0.0)

def unpack_emissions(em):
    # normalisiert verschiedene Rückgabeformen (Objekt/dict/float/None)
    if em is None:
        return 0.0, 0.0
    if hasattr(em, "energy_consumed") and hasattr(em, "emissions"):
        try: return float(em.energy_consumed), float(em.emissions)
        except Exception: pass
    if isinstance(em, dict):
        e = em.get("energy_consumed", 0.0); c = em.get("emissions", em.get("emissions_kg", 0.0))
        try: return float(e), float(c)
        except Exception: return 0.0, 0.0
    try:
        return 0.0, float(em)  # nur CO2
    except Exception:
        return 0.0, 0.0

def read_energy_from_log(path: str) -> float:
    # CSV-Fallback: Energie (kWh) aus CodeCarbon-Log
    try:
        if not os.path.exists(path):
            return 0.0
        df = pd.read_csv(path)
        for c in ["energy_consumed","energy_consumed_kwh","energy_consumed (kWh)","energy (kWh)"]:
            if c in df.columns:
                return float(df[c].iloc[-1])
        for c in df.columns:
            n = c.lower()
            if "energy" in n and "kwh" in n:
                return float(df[c].iloc[-1])
    except Exception:
        pass
    return 0.0

def measure_phase(phase_name: str, fn, log_prefix: str):
    # Startet eigenen Tracker pro Phase, misst Zeit & Energie, nutzt CSV-Fallback.
    logfile = f"{log_prefix}_{phase_name}.csv"
    tracker = make_tracker_named(project_name=f"{log_prefix}_{phase_name}", output_file=logfile)
    started = safe_start(tracker)
    t0 = time.time()
    result = fn()
    t1 = time.time()
    em_raw = safe_stop(tracker, started)
    energy_kwh, co2_kg = unpack_emissions(em_raw)
    if not energy_kwh or energy_kwh == 0.0:
        energy_from_csv = read_energy_from_log(logfile)
        if energy_from_csv:
            energy_kwh = energy_from_csv
    return {"phase": phase_name, "time_s": t1 - t0, "energy_kwh": energy_kwh, "co2_kg": co2_kg}, result


In [None]:
# --- Evaluation-Config ---
MODELS = [
    ("bigscience/bloom-560m", "bloom560m", "b560"),
    ("bigscience/bloom-3b",   "bloom3b",   "b3b"),
]
EVAL = {
    "max_new_tokens": 32,
    "ppl_dataset":  {"name":"wikitext","config":"wikitext-2-raw-v1","split":"test[:1%]"},
    "bleu_dataset": {"name":"wmt14","config":"de-en","split":"test[:32]"},
}
def parse_subset_count(split_str: str, default=32):
    m = re.search(r":\s*(\d+)\s*\]$", split_str or ""); return int(m.group(1)) if m else default
BLEU_N = parse_subset_count(EVAL["bleu_dataset"]["split"], default=32)

PROMPTS = [
    "Schreibe einen kurzen Absatz über nachhaltige KI.",
    "Erkläre in einfachen Worten, was Quantisierung in neuronalen Netzen ist.",
    "Nenne drei Vorteile von Mixture-of-Experts-Modellen."
]

def autocast_ctx():
    return torch.autocast(device_type="cuda", dtype=torch.float16) if device=="cuda" else nullcontext()

def capture_memory():
    ram = psutil.Process().memory_info().rss
    valloc = torch.cuda.memory_allocated() if device=="cuda" else 0
    vres  = torch.cuda.memory_reserved()  if device=="cuda" else 0
    return ram, valloc, vres

def bytes_to_gb(b): return float(b)/(1024**3)
bleu_metric = evaluate.load("sacrebleu")

@dataclass
class BaselineResult:
    model_id: str
    alias: str
    precision: str
    time_s: float
    energy_kwh: float
    co2_kg: float
    tokens_out: int
    ram_GB: float
    vram_alloc_GB: float
    vram_reserved_GB: float
    ppl: Optional[float] = None
    bleu: Optional[float] = None
    notes: str = ""

def safe_max_len(tok, model, fallback=2048, upper=100000):
    cand = getattr(tok, "model_max_length", None)
    if isinstance(cand, int) and 0 < cand < upper: return cand
    cand = getattr(getattr(model, "config", None), "max_position_embeddings", None)
    if isinstance(cand, int) and 0 < cand < upper: return cand
    return fallback

def load_model(model_id: str):
    tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
    tok.padding_side = "left"; tok.pad_token = tok.eos_token
    try:
        if device == "cuda":
            torch.cuda.empty_cache(); gc.collect()
        model = AutoModelForCausalLM.from_pretrained(
            model_id, device_map="auto",
            torch_dtype=torch.float16 if device=="cuda" else torch.float32,
        )
        return tok, model, "fp16"
    except RuntimeError as e:
        if "out of memory" not in str(e).lower(): raise
        print(f"[Info] OOM bei FP16 für {model_id}. Fallback auf INT8…")
    if device == "cuda":
        torch.cuda.empty_cache(); gc.collect()
    bnb = BitsAndBytesConfig(load_in_8bit=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, device_map="auto", quantization_config=bnb,
        torch_dtype=torch.float16 if device=="cuda" else torch.float32,
    )
    return tok, model, "int8"

def warmup(model, tok, max_len):
    with torch.no_grad(), autocast_ctx():
        dummy = tok("Hello", return_tensors="pt", truncation=True, max_length=max_len).to(model.device)
        _ = model.generate(**dummy, max_new_tokens=1, do_sample=False, pad_token_id=tok.eos_token_id)

def simple_generate(model, tok, prompts, max_new_tokens=32):
    model.eval(); total_gen_tokens, texts = 0, []
    max_len = safe_max_len(tok, model)
    for p in prompts:
        enc = tok(p, return_tensors="pt", truncation=True, max_length=max_len)
        input_ids = enc["input_ids"].to(model.device)
        attn = enc.get("attention_mask", None);
        if attn is not None: attn = attn.to(model.device)
        room = max_len - input_ids.shape[1]
        cur_new = max(1, min(max_new_tokens, int(room)))
        with torch.no_grad(), autocast_ctx():
            out_ids = model.generate(
                input_ids=input_ids, attention_mask=attn,
                max_new_tokens=cur_new, do_sample=False,
                pad_token_id=tok.eos_token_id
            )
        gen_len = out_ids.shape[1] - input_ids.shape[1]
        total_gen_tokens += int(gen_len)
        texts.append(tok.decode(out_ids[0], skip_special_tokens=True))
    return texts, total_gen_tokens

def eval_perplexity(model, tok, ds_cfg):
    ds = load_dataset(ds_cfg["name"], ds_cfg["config"], split=ds_cfg["split"])
    max_len = safe_max_len(tok, model); losses = []
    with torch.no_grad():
        for t in ds["text"]:
            if not isinstance(t, str) or len(t.strip()) < 4: continue
            enc = tok(t, return_tensors="pt", truncation=True, max_length=max_len)
            ids = enc["input_ids"].to(model.device)
            with autocast_ctx(): out = model(ids, labels=ids)
            losses.append(float(out.loss.detach().cpu()))
    return math.exp(np.mean(losses)) if losses else None

def eval_bleu_llm(model, tok, ds_cfg, max_new_tokens=32):
    ds = load_dataset(ds_cfg["name"], ds_cfg["config"], split=ds_cfg["split"])
    max_len = safe_max_len(tok, model); preds, refs = [], []
    with torch.no_grad():
        for ex in ds:
            de, en = ex["translation"]["de"], ex["translation"]["en"]
            prompt = f"Translate to English:\nGerman: {de}\nEnglish:"
            inputs = tok(prompt, return_tensors="pt", truncation=True, max_length=max_len).to(model.device)
            room = max_len - inputs["input_ids"].shape[1]
            cur_new = max(1, min(max_new_tokens, int(room)))
            with autocast_ctx():
                out = model.generate(**inputs, max_new_tokens=cur_new, do_sample=False, pad_token_id=tok.eos_token_id)
            gen = tok.decode(out[0], skip_special_tokens=True)
            seg = gen.split("English:")[-1].strip()
            hyp = seg.split("\n")[0].strip() or gen.strip()
            preds.append(hyp); refs.append([en])
    return float(bleu_metric.compute(predictions=preds, references=refs)["score"])

# --- Baseline-Lauf (per Phase) ---
def run_baseline(model_id: str, alias_long: str, alias_short: str):
    tok, model, prec = load_model(model_id)
    max_len = safe_max_len(tok, model)
    warmup(model, tok, max_len)  # Warm-up außerhalb der Messung
    log_prefix = f"bloom_{alias_short}"

    def _do_gen(): return simple_generate(model, tok, PROMPTS, EVAL["max_new_tokens"])
    gen_metrics, (examples, tokens_out) = measure_phase("gen", _do_gen, log_prefix)

    def _do_ppl(): return eval_perplexity(model, tok, EVAL["ppl_dataset"])
    ppl_metrics, ppl = measure_phase("ppl", _do_ppl, log_prefix)

    if device == "cuda": torch.cuda.empty_cache()
    def _do_bleu(): return eval_bleu_llm(model, tok, EVAL["bleu_dataset"], EVAL["max_new_tokens"])
    bleu_metrics, bleu = measure_phase("bleu", _do_bleu, log_prefix)

    total_time   = gen_metrics["time_s"] + ppl_metrics["time_s"] + bleu_metrics["time_s"]
    total_energy = gen_metrics["energy_kwh"] + ppl_metrics["energy_kwh"] + bleu_metrics["energy_kwh"]
    total_co2    = gen_metrics["co2_kg"] + ppl_metrics["co2_kg"] + bleu_metrics["co2_kg"]

    ram, valloc, vres = capture_memory()

    per_phase_df = pd.DataFrame([gen_metrics, ppl_metrics, bleu_metrics])
    per_phase_df["alias"] = alias_short
    per_phase_df["model_id"] = model_id
    per_phase_df["precision"] = prec
    per_phase_df["tokens_out"] = [tokens_out, None, None]
    per_phase_df["ppl"] = [None, ppl, None]
    per_phase_df["bleu"] = [None, None, bleu]

    # Abgeleitete Kennzahlen (vektorisiert, robust)
    per_phase_df["wh_total"] = per_phase_df["energy_kwh"] * 1000.0
    per_phase_df["tokens_s"] = None
    per_phase_df.loc[per_phase_df["phase"]=="gen", "tokens_s"] = (
        per_phase_df.loc[per_phase_df["phase"]=="gen", "tokens_out"]
        / per_phase_df.loc[per_phase_df["phase"]=="gen", "time_s"]
    )
    per_phase_df["wh_per_token"] = None
    per_phase_df.loc[per_phase_df["phase"]=="gen", "wh_per_token"] = (
        per_phase_df.loc[per_phase_df["phase"]=="gen", "wh_total"]
        / per_phase_df.loc[per_phase_df["phase"]=="gen", "tokens_out"]
    )
    per_phase_df["s_per_example"] = None
    per_phase_df.loc[per_phase_df["phase"]=="bleu", "s_per_example"] = (
        per_phase_df.loc[per_phase_df["phase"]=="bleu", "time_s"] / float(BLEU_N)
    )
    per_phase_df["wh_per_example"] = None
    per_phase_df.loc[per_phase_df["phase"]=="bleu", "wh_per_example"] = (
        per_phase_df.loc[per_phase_df["phase"]=="bleu", "wh_total"] / float(BLEU_N)
    )
    # Emissionsfaktor (kg CO2 pro kWh) je Phase
    per_phase_df["kg_per_kwh"] = (per_phase_df["co2_kg"] / per_phase_df["energy_kwh"]).replace([np.inf, -np.inf], np.nan)

    print(f"\nPer-Phase ({alias_short}) — Standort:",
          f"GCP {CLOUD_REGION}" if USE_GCP_REGION else f"Ländermix {COUNTRY_ISO_CODE}")
    print(per_phase_df[[
        "phase","time_s","energy_kwh","co2_kg","kg_per_kwh",
        "tokens_out","ppl","bleu","tokens_s","wh_per_token","s_per_example","wh_per_example"
    ]])

    res = BaselineResult(
        model_id=model_id, alias=alias_short, precision=prec,
        time_s=total_time, energy_kwh=total_energy, co2_kg=total_co2,
        tokens_out=int(tokens_out),
        ram_GB=bytes_to_gb(ram), vram_alloc_GB=bytes_to_gb(valloc), vram_reserved_GB=bytes_to_gb(vres),
        ppl=ppl, bleu=bleu, notes=f"GPU={gpu_name}, VRAM={vram_total_gb:.1f} GB"
    )
    return res, examples, per_phase_df


In [None]:
# --- Ausführen & Speichern ---
all_results, all_samples, phase_tables = [], [], []
for model_id, alias_long, alias_short in MODELS:
    print(f"\n### Starte Baseline (per Phase): {alias_long}")
    res, ex, phase_df = run_baseline(model_id, alias_long, alias_short)
    all_results.append(asdict(res)); all_samples.append((alias_short, ex)); phase_tables.append(phase_df)

df = pd.DataFrame(all_results).sort_values("alias").reset_index(drop=True)
# Emissionsfaktor (kg/kWh) auf Gesamtebene
df["kg_per_kwh"] = (df["co2_kg"] / df["energy_kwh"]).replace([np.inf, -np.inf], np.nan)

print("\nGesamt (summiert über Phasen):")
print(df[["model_id","alias","precision","time_s","energy_kwh","co2_kg","kg_per_kwh",
          "tokens_out","ppl","bleu","ram_GB","vram_alloc_GB","vram_reserved_GB","notes"]])

df_phase = pd.concat(phase_tables, ignore_index=True)
print("\nPer-Phase Übersicht (mit abgeleiteten Kennzahlen):")
print(df_phase[["alias","phase","time_s","energy_kwh","co2_kg","kg_per_kwh",
                "tokens_out","ppl","bleu","tokens_s","wh_per_token","s_per_example","wh_per_example"]])

out_dir = project_path
df.to_csv(os.path.join(out_dir, "baseline_bloom_dual_results.csv"), index=False)
df_phase.to_csv(os.path.join(out_dir, "baseline_bloom_dual_per_phase.csv"), index=False)
print("Gespeichert (gesamt):", os.path.join(out_dir, "baseline_bloom_dual_results.csv"))
print("Gespeichert (per Phase):", os.path.join(out_dir, "baseline_bloom_dual_per_phase.csv"))

for alias_short, ex in all_samples:
    samples_path = os.path.join(out_dir, f"baseline_samples_{alias_short}.txt")
    with open(samples_path, "w", encoding="utf-8") as f:
        for i, txt in enumerate(ex, 1):
            f.write(f"--- Beispiel {i} ({alias_short}) ---\n{txt}\n\n")
    print("Beispiele gespeichert:", samples_path)

print("\nEmissions-Logs (pro Phase):")
for _, _, alias_short in MODELS:
    print(f" - bloom_{alias_short}_gen.csv")
    print(f" - bloom_{alias_short}_ppl.csv")
    print(f" - bloom_{alias_short}_bleu.csv")