In [34]:
import json, math
import numpy as np
from pathlib import Path

CSR_DIR = Path("/home/yyb02274/yolov5/csr_dump_ratio[0.9]")   # CSR 덤프 폴더
WS_DIR  = Path("ws_dump_ratio[0.9]")    # Weight Sharing 결과 폴더
WS_DIR.mkdir(exist_ok=True)

K = 32          # 코드북 크기
MAX_ITERS = 30  # k-means 반복 수
SEED = 0        # 재현성
np.random.seed(SEED)
rng = np.random.default_rng(SEED)

In [18]:
def kmeanspp_init_1d(x: np.ndarray, k: int):
    x = x.astype(np.float32, copy=False)
    n = x.shape[0]
    centers = np.empty(k, dtype=np.float32)
    idx0 = rng.integers(0, n)
    centers[0] = x[idx0]
    d2 = (x - centers[0])**2
    for i in range(1, k):
        probs = d2 / (d2.sum() + 1e-12)
        idx = rng.choice(n, p=probs)
        centers[i] = x[idx]
        d2 = np.minimum(d2, (x - centers[i])**2)
    return centers

def kmeans_1d(x: np.ndarray, k: int, max_iters=30):
    x = x.astype(np.float32, copy=False)
    if x.size == 0:
        return np.zeros((0,), np.float32), np.zeros((0,), np.int64)
    uniq = np.unique(x)
    if uniq.size <= k:
        centers = np.zeros((k,), np.float32)
        centers[:uniq.size] = uniq
        labels = np.searchsorted(uniq, x)
        return centers, labels.astype(np.int64)

    centers = kmeanspp_init_1d(x, k)
    last_inertia = np.inf
    for _ in range(max_iters):
        # assign
        dist2 = (x[:, None] - centers[None, :])**2
        labels = dist2.argmin(axis=1)
        # update
        new_centers = centers.copy()
        for j in range(k):
            mask = (labels == j)
            if mask.any():
                new_centers[j] = x[mask].mean()
            else:
                new_centers[j] = x[rng.integers(0, x.size)]
        centers = new_centers
        inertia = ((x - centers[labels])**2).sum()
        if abs(last_inertia - inertia) <= 1e-6 * max(1.0, last_inertia):
            break
        last_inertia = inertia

    # 정렬해서 라벨 재맵핑(가독성)
    order = np.argsort(centers)
    remap = np.zeros_like(order)
    remap[order] = np.arange(k)
    centers = centers[order]
    labels = remap[labels]
    return centers, labels.astype(np.int64)


In [19]:
def process_layer_to_ws(layer_name: str, k: int = K):
    """csr_dump/<name>.npz 에서 values를 읽어 k-means 수행 후 ws_dump에 저장"""
    npz_path = CSR_DIR / f"{layer_name.replace('.', '_')}.npz"
    z = np.load(npz_path)
    vals = z["values"].astype(np.float32)
    nnz = vals.size

    if nnz == 0:
        codebook = np.zeros((0,), dtype=np.float16)
        indices  = np.zeros((0,), dtype=np.uint8)
        mse = 0.0
    else:
        centers, labels = kmeans_1d(vals, k, MAX_ITERS)
        recon = centers[labels]
        se  = ((vals - recon)**2).sum()
        mse = float(se / nnz)

        codebook = centers.astype(np.float16)   # 코드북 ↓
        indices  = labels.astype(np.uint8)      # 인덱스( k<=256 )

    out_path = WS_DIR / f"{layer_name.replace('.', '_')}.npz"
    np.savez(out_path, codebook=codebook, indices=indices)
    return {"name": layer_name, "k": k, "nnz": int(nnz), "mse": mse,
            "codebook_dtype": "float16", "indices_dtype": "uint8"}


In [35]:
mani = json.load(open(CSR_DIR/"manifest_ratio[0.9].json", "r", encoding="utf-8"))
summary = []
total_nnz, total_se = 0, 0.0

for layer in mani["layers"]:
    info = process_layer_to_ws(layer["name"], K)
    summary.append(info)
    total_nnz += info["nnz"]
    total_se  += info["mse"] * max(1, info["nnz"])

rmse = math.sqrt(total_se / max(1, total_nnz)) if total_nnz else 0.0

mani_ws = {
    "source_csr_dir": str(CSR_DIR),
    "out_dir": str(WS_DIR),
    "k": K,
    "layers": summary,
    "global_rmse": rmse,
    "note": "Each layer has codebook(float16) and indices(uint8) aligned with CSR values order."
}
with open(WS_DIR / "manifest_ws.json", "w", encoding="utf-8") as f:
    json.dump(mani_ws, f, indent=2)

print(f"✅ Weight sharing done. layers={len(summary)}, k={K}")
print(f"   Global RMSE (over nnz): {rmse:.6f}")
print(f"   Saved to: {WS_DIR.resolve()}")


✅ Weight sharing done. layers=60, k=32
   Global RMSE (over nnz): 0.001673
   Saved to: /home/yyb02274/yolov5/ws_dump_ratio[0.9]


In [26]:
import sys
sys.path.append("/home/yyb02274/yolov5")
from utils.general import check_yaml
from utils.dataloaders import check_dataset, create_dataloader
from pathlib import Path
%cd /home/yyb02274/yolov5
DATA = "data/coco128.yaml"  # 네 데이터셋 yaml 경로로 변경
# 문자열 yaml -> dict
data_dict = check_dataset(check_yaml(DATA))

# 경로 정규화(／ 같은 풀와이드 슬래시 대응)
def norm_path(p):
    s = str(p).replace("／", "/").replace("\\", "/")
    while '//' in s:
        s = s.replace('//', '/')
    return s

for k in ("train", "val", "test"):
    if k in data_dict:
        data_dict[k] = norm_path(data_dict[k])

print("val path:", data_dict.get("val"))
assert Path(data_dict["val"]).exists(), f"val path not found: {data_dict['val']}"


/home/yyb02274/yolov5
val path: /home/yyb02274/datasets/coco128/images/train2017


In [28]:
import os, sys
os.chdir("/home/yyb02274/yolov5")  # ← 네 YOLOv5 레포 루트 경로
print("CWD:", os.getcwd())

# 경로가 맞다면 이게 이제 됩니다
from models.common import DetectMultiBackend

CWD: /home/yyb02274/yolov5


In [30]:
# === CONFIG ===
PRUNED_MODEL_PATH = "/home/yyb02274/yolov5/runs/train/retrain_pruned/weights/best.pt"  # 같은 아키텍처의 ckpt
DATA_YAML         = "data/coco128.yaml"  # 네 데이터셋 yaml
ABS_TRAIN = "/home/yyb02274/datasets/coco128/images/train2017"  # 필요시 실제 경로
ABS_VAL   = "/home/yyb02274/datasets/coco128/images/train2017"

CSR_DIR = "csr_dump_ratio[0.5]"   # row_ptr, col_ind, values 저장된 폴더
WS_DIR  = "ws_dump_ratio[0.5]"    # codebook, indices 저장된 폴더

# === Load YOLOv5 model skeleton ===
import torch
from pathlib import Path
from models.common import DetectMultiBackend

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dm = DetectMultiBackend(str(PRUNED_MODEL_PATH), device=device, dnn=False, fuse=False)
model = dm.model.to(device).eval()   # nn.Module
print("Loaded model:", type(model))


Loaded model: <class 'models.yolo.DetectionModel'>


In [36]:
import json, numpy as np
from pathlib import Path

CSR_DIR = Path(CSR_DIR)
WS_DIR  = Path(WS_DIR)
mani = json.load(open(CSR_DIR / "manifest_ratio[0.9].json", "r", encoding="utf-8"))

def csr_to_dense(row_ptr, col_ind, values, shape2d):
    rows, cols = shape2d
    dense = np.zeros((rows, cols), dtype=np.float32)
    for r in range(rows):
        s, e = int(row_ptr[r]), int(row_ptr[r+1])
        if e > s:
            dense[r, col_ind[s:e]] = values[s:e]
    return dense

def rebuild_layer_dense(name, meta):
    csr = np.load(CSR_DIR / f"{name.replace('.', '_')}.npz")
    ws  = np.load(WS_DIR  / f"{name.replace('.', '_')}.npz")

    row_ptr = csr["row_ptr"].astype(np.int64)
    col_ind = csr["col_ind"].astype(np.int64)
    # values는 필요 없음: WS codebook+indices로 재구성
    codebook = ws["codebook"].astype(np.float32)
    indices  = ws["indices"].astype(np.int64)
    values   = codebook[indices] if indices.size else np.zeros((0,), dtype=np.float32)

    dense2d = csr_to_dense(row_ptr, col_ind, values, tuple(meta["shape2d"]))
    if meta["type"] == "conv2d":
        O, I, kH, kW = meta["shape"]
        return dense2d.reshape(O, I, kH, kW)
    else:
        return dense2d  # (out,in)

# 주입
module_map = dict(model.named_modules())
mismatch = []
for L in mani["layers"]:
    name = L["name"]
    Wrec = rebuild_layer_dense(name, L)
    mod = module_map.get(name, None)
    if (mod is None) or (not hasattr(mod, "weight")):
        mismatch.append(name); continue
    with torch.no_grad():
        mod.weight.data = torch.from_numpy(Wrec).to(mod.weight.data.device, dtype=mod.weight.data.dtype)

print("Injected WS weights. mismatches:", mismatch[:5], "count:", len(mismatch))


Injected WS weights. mismatches: [] count: 0


In [37]:
from utils.general import check_yaml
try:
    from utils.dataloaders import check_dataset, create_dataloader
except:
    from utils.general import check_dataset
    from utils.dataloaders import create_dataloader

# yaml -> dict
data_dict = check_dataset(check_yaml(DATA_YAML))

# 풀와이드 슬래시/중복 슬래시 방지
def norm_path(p):
    s = str(p).replace("／", "/").replace("\\", "/")
    while '//' in s:
        s = s.replace('//', '/')
    return s

for k in ("train","val","test"):
    if k in data_dict and data_dict[k]:
        data_dict[k] = norm_path(data_dict[k])

# 필요하면 강제 절대경로 덮어쓰기 (coco128처럼 path+relative 섞여 망가질 때)
data_dict["train"] = ABS_TRAIN
data_dict["val"]   = ABS_VAL

from torch import tensor
stride = int(getattr(model, 'stride', tensor([32])).max())
IMGSZ, BATCH, WORKERS = 640, 16, 4

dataloader = create_dataloader(
    data_dict["val"], IMGSZ, BATCH, stride,
    single_cls=False, pad=0.5, rect=True, workers=WORKERS, prefix="val: "
)[0]
print("val images:", len(dataloader.dataset))


val: Scanning /home/yyb02274/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]


val images: 128


In [38]:
from val import run as val_run
import pandas as pd

DEVICE = "0" if torch.cuda.is_available() else "cpu"

results, maps, _ = val_run(
    data=data_dict,
    dataloader=dataloader,   # 우리가 만든 로더
    imgsz=IMGSZ,
    batch_size=BATCH,
    model=model,             # ✅ WS 가중치 주입된 모델
    device=DEVICE,
    iou_thres=0.6,
    single_cls=False,
    verbose=True,
    plots=False
)

# YOLOv5 val.run 결과 포맷: (P, R, mAP50, mAP50_95, val_box, val_obj, val_cls)
P, R, mAP50, mAP5095, val_box, val_obj, val_cls = results
print(f"[WS] P={P:.4f}  R={R:.4f}  mAP@0.5={mAP50:.4f}  mAP@0.5:.95={mAP5095:.4f}")
print(f"[WS] val/box_loss={val_box:.5f}  val/obj_loss={val_obj:.5f}  val/cls_loss={val_cls:.5f}")

row = {
    "stage": "pruned+WS",
    "metrics/precision": P,
    "metrics/recall": R,
    "metrics/mAP_0.5": mAP50,
    "metrics/mAP_0.5:0.95": mAP5095,
    "val/box_loss": val_box,
    "val/obj_loss": val_obj,
    "val/cls_loss": val_cls,
    "imgsz": IMGSZ, "batch": BATCH
}
pd.DataFrame([row]).to_csv("ws_eval_metrics_ratio[0.9].csv", index=False)
print("Saved ws_eval_metrics_ratio[0.9].csv")


                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100%|██████████| 8/8 [00:00<00:00,  8.41it/s]
                   all        128        929   0.000595    0.00507   0.000387   0.000178
                person        128        254    0.00114     0.0394   0.000722   0.000345
               bicycle        128          6          0          0          0          0
                   car        128         46          0          0          0          0
            motorcycle        128          5          0          0          0          0
              airplane        128          6     0.0263      0.167     0.0167    0.00668
                   bus        128          7          0          0          0          0
                 train        128          3          0          0          0          0
                 truck        128         12          0          0          0          0
                  boat        128          6          0        

[WS] P=0.0006  R=0.0051  mAP@0.5=0.0004  mAP@0.5:.95=0.0002
[WS] val/box_loss=0.00000  val/obj_loss=0.00000  val/cls_loss=0.00000
Saved ws_eval_metrics_ratio[0.9].csv
