In [4]:
!pip -q install torch torchvision torchaudio pytorch-lightning plotly --upgrade

import os, time, random, numpy as np, torch
import torch.nn.functional as F
import torch.nn as nn
import torchvision, torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision.models import resnet18, ResNet18_Weights, mobilenet_v3_small
from torch.nn.utils import prune
import plotly.express as px
import pandas as pd

# Deterministiklik
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
os.makedirs("assets", exist_ok=True)
print("✅ Setup ready | Device:", DEVICE)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/832.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m832.4/832.4 kB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/9.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m7.1/9.8 MB[0m [31m213.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m9.8/9.8 MB[0m [31m218.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m130.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/983.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Setup ready | Device

In [5]:
IMG_SIZE = 224
BATCH    = 128

train_tf = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.6, 1.0)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) # ImageNet mean/std
])

test_tf = T.Compose([
    T.Resize(IMG_SIZE),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

trainset = torchvision.datasets.CIFAR10(root="./data", train=True,  download=True, transform=train_tf)
testset  = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=test_tf)

trainloader = DataLoader(trainset, batch_size=BATCH, shuffle=True,  num_workers=2, pin_memory=True)
testloader  = DataLoader(testset,  batch_size=BATCH, shuffle=False, num_workers=2, pin_memory=True)

len(trainset), len(testset)


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


(50000, 10000)

In [6]:
NUM_CLASSES = 10

# Teacher: ImageNet ön-eğitimli → son katmanı CIFAR-10'a uydur
teacher = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
teacher.fc = nn.Linear(teacher.fc.in_features, NUM_CLASSES)
teacher = teacher.to(DEVICE)

# Student: sıfırdan MobileNetV3-Small
student = mobilenet_v3_small(weights=None, num_classes=NUM_CLASSES).to(DEVICE)

def count_params(m):
    return sum(p.numel() for p in m.parameters())

print(f"Teacher params: {count_params(teacher):,}")
print(f"Student params: {count_params(student):,}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 213MB/s]


Teacher params: 11,181,642
Student params: 1,528,106


In [7]:
@torch.no_grad()
def evaluate(model, loader, device=DEVICE):
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        pred = logits.argmax(1)
        correct += (pred == y).sum().item()
        total += y.numel()
    return correct / total

def measure_latency(model, device=DEVICE, input_shape=(1,3,IMG_SIZE,IMG_SIZE), runs=30, warmup=5):
    model.eval()
    x = torch.randn(*input_shape, device=device)
    # warmup
    for _ in range(warmup):
        _ = model(x)
    if device == "cuda": torch.cuda.synchronize()
    ts=[]
    for _ in range(runs):
        t0 = time.time()
        _  = model(x)
        if device == "cuda": torch.cuda.synchronize()
        ts.append((time.time()-t0)*1000)
    return float(np.mean(ts)), float(np.std(ts))

def size_mb(model):
    total_params = sum(p.numel() for p in model.parameters())
    return total_params * 4 / 1e6  # fp32 ~4 byte/param


In [8]:
LR = 1e-3
TEACHER_EPOCHS = 3

opt_t = torch.optim.AdamW(teacher.parameters(), lr=LR)

for ep in range(1, TEACHER_EPOCHS+1):
    teacher.train()
    for x, y in trainloader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = teacher(x)
        loss = F.cross_entropy(logits, y)
        opt_t.zero_grad(); loss.backward(); opt_t.step()
    acc = evaluate(teacher, testloader)
    print(f"[Teacher] Epoch {ep}/{TEACHER_EPOCHS}  Acc={acc:.4f}")

teacher_base_acc = evaluate(teacher, testloader)
print("Teacher final Acc:", teacher_base_acc)


[Teacher] Epoch 1/3  Acc=0.8538
[Teacher] Epoch 2/3  Acc=0.8781
[Teacher] Epoch 3/3  Acc=0.8991
Teacher final Acc: 0.8991


In [9]:
STUDENT_EPOCHS = 6
T_temp = 4.0   # distillation sıcaklığı
ALPHA  = 0.7   # soft (teacher) vs hard (etiket) ağırlığı

def kd_loss(student_logits, teacher_logits, hard_targets, T=T_temp, alpha=ALPHA):
    # Teacher'ın yumuşak hedeflerini al
    soft_teacher = F.log_softmax(teacher_logits / T, dim=1)
    soft_student = F.log_softmax(student_logits / T, dim=1)
    kd = F.kl_div(soft_student, soft_teacher, reduction="batchmean", log_target=True) * (T*T)
    ce = F.cross_entropy(student_logits, hard_targets)
    return alpha*kd + (1-alpha)*ce

opt_s = torch.optim.AdamW(student.parameters(), lr=LR)

for ep in range(1, STUDENT_EPOCHS+1):
    student.train(); teacher.eval()
    for x, y in trainloader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        with torch.no_grad():
            tlog = teacher(x)     # öğretmenin logits'i
        slog = student(x)
        loss = kd_loss(slog, tlog, y)
        opt_s.zero_grad(); loss.backward(); opt_s.step()
    acc = evaluate(student, testloader)
    print(f"[Student-KD] Epoch {ep}/{STUDENT_EPOCHS}  Acc={acc:.4f}")

student_kd_acc = evaluate(student, testloader)
print("Student (KD) Acc:", student_kd_acc)


[Student-KD] Epoch 1/6  Acc=0.1001
[Student-KD] Epoch 2/6  Acc=0.6942
[Student-KD] Epoch 3/6  Acc=0.7348
[Student-KD] Epoch 4/6  Acc=0.7692
[Student-KD] Epoch 5/6  Acc=0.8041
[Student-KD] Epoch 6/6  Acc=0.7952
Student (KD) Acc: 0.7952


In [10]:
modules_to_prune = []
for m in student.features.modules():
    if isinstance(m, nn.Conv2d):
        modules_to_prune.append((m, 'weight'))

for (m, name) in modules_to_prune:
    prune.l1_unstructured(m, name='weight', amount=0.5)

pruned_acc = evaluate(student, testloader)
print("Student (KD + Prune 50%) Acc:", pruned_acc)

# Maskeleri kalıcı hale getir (isteğe bağlı, rapor için genelde iyi olur)
for (m, name) in modules_to_prune:
    prune.remove(m, 'weight')


Student (KD + Prune 50%) Acc: 0.2144


In [11]:
print("teacher:", 'teacher' in globals())
print("student:", 'student' in globals())
print("testloader:", 'testloader' in globals())


teacher: True
student: True
testloader: True


In [12]:
# Güvenlik: gerekli objeler var mı?
needed = ["teacher", "student", "testloader", "evaluate", "measure_latency", "size_mb", "DEVICE"]
missing = [n for n in needed if n not in globals()]
assert not missing, f"Eksik değişken/func: {missing}. Lütfen 1–7. adımları tekrar çalıştır."

import pandas as pd
import plotly.express as px
import torch

def snapshot(model, name):
    # Hepsi GPU'da ölçülecek
    acc = evaluate(model, testloader, device=DEVICE)
    lat_m, lat_s = measure_latency(model.to(DEVICE), device=DEVICE, runs=30, warmup=5)
    mb = size_mb(model)
    return dict(Variant=name, Acc=acc, LatencyMs=lat_m, SizeMB=mb, Device=DEVICE)


In [13]:
rows = []
rows.append(snapshot(teacher, "Teacher-ResNet18 (GPU)"))
rows.append(snapshot(student, "Student-KD-Pruned (GPU)"))

df = pd.DataFrame(rows)
display(df)

fig1 = px.bar(df, x="Variant", y="Acc", title="Accuracy (GPU)")
fig2 = px.bar(df, x="Variant", y="LatencyMs", title="Latency (ms, batch=1, GPU)")
fig3 = px.bar(df, x="Variant", y="SizeMB", title="Model Size (MB ~ params*4B)")
fig1.show(); fig2.show(); fig3.show()

import os
os.makedirs("assets", exist_ok=True)
df.to_csv("assets/results_gpu_only.csv", index=False)
print("📦 Kaydedildi: assets/results_gpu_only.csv")


Unnamed: 0,Variant,Acc,LatencyMs,SizeMB,Device
0,Teacher-ResNet18 (GPU),0.8991,2.901284,44.726568,cuda
1,Student-KD-Pruned (GPU),0.2144,7.893038,6.112424,cuda


📦 Kaydedildi: assets/results_gpu_only.csv


In [14]:
try:
    print("Student (KD, prune ÖNCESİ) acc:", student_kd_acc)
except NameError:
    # Değişken yoksa şimdi ölçelim (pruning sonrası öğrenci üzerinde fikir verir)
    print("student_kd_acc değişkeni yoktu, mevcut öğrenci (pruned) acc:", evaluate(student, testloader))


Student (KD, prune ÖNCESİ) acc: 0.7952


In [15]:
# ✅ KD'yi kısaca yeniden koş (4 epoch) ve 'assets/student_kd.pt' olarak kaydet
import torch, torch.nn as nn, torch.nn.functional as F
from torchvision.models import mobilenet_v3_small

LR = 3e-4
EPOCHS_KD = 4
ALPHA = 0.9
T_temp = 4.0
NUM_CLASSES = 10

# fresh student
student = mobilenet_v3_small(weights=None, num_classes=NUM_CLASSES).to(DEVICE)

def kd_loss(slog, tlog, y, T=T_temp, a=ALPHA):
    st = F.log_softmax(slog/T, dim=1)
    tt = F.log_softmax(tlog/T, dim=1)
    kd = F.kl_div(st, tt, reduction="batchmean", log_target=True)*(T*T)
    ce = F.cross_entropy(slog, y, label_smoothing=0.05)
    return a*kd + (1-a)*ce

opt = torch.optim.AdamW(student.parameters(), lr=LR, weight_decay=1e-4)
for ep in range(1, EPOCHS_KD+1):
    student.train(); teacher.eval()
    for x,y in trainloader:
        x,y=x.to(DEVICE), y.to(DEVICE)
        with torch.no_grad(): tlog = teacher(x)
        slog = student(x)
        loss = kd_loss(slog, tlog, y)
        opt.zero_grad(); loss.backward(); opt.step()
    acc = evaluate(student, testloader)
    print(f"[KD-rebuild] ep{ep}: acc={acc:.4f}")

import os
os.makedirs("assets", exist_ok=True)
torch.save(student.state_dict(), "assets/student_kd.pt")
print("✅ Kaydedildi: assets/student_kd.pt  | KD acc:", evaluate(student, testloader))


[KD-rebuild] ep1: acc=0.1000
[KD-rebuild] ep2: acc=0.5535
[KD-rebuild] ep3: acc=0.6394
[KD-rebuild] ep4: acc=0.6676
✅ Kaydedildi: assets/student_kd.pt  | KD acc: 0.6676


In [16]:
# 🔪 %20 unstructured pruning + kısa finetune + remove + save
import torch, torch.nn.functional as F
import torch.nn as nn
from torch.nn.utils import prune

# KD checkpoint'ten başla
student.load_state_dict(torch.load("assets/student_kd.pt", map_location=DEVICE))
student.to(DEVICE).train()

# Conv2d ağırlıklarını buda
modules_to_prune = []
for m in student.features.modules():
    if isinstance(m, nn.Conv2d):
        modules_to_prune.append((m, 'weight'))

amount = 0.2  # 0.3 de deneyebilirsin, ama önce 0.2 güvenli
for (m, name) in modules_to_prune:
    prune.l1_unstructured(m, name='weight', amount=amount)

print("Prune sonrası (hemen) acc:", evaluate(student.eval(), testloader)); student.train()

# Kısa finetune (2 epoch)
opt = torch.optim.AdamW(student.parameters(), lr=3e-4, weight_decay=1e-4)
for ep in range(1, 3):
    for x,y in trainloader:
        x,y = x.to(DEVICE), y.to(DEVICE)
        out  = student(x)
        loss = F.cross_entropy(out, y, label_smoothing=0.05)
        opt.zero_grad(); loss.backward(); opt.step()
    print(f"[Post-prune finetune ep{ep}] acc={evaluate(student.eval(), testloader):.4f}")

# Maskeleri kalıcılaştır (parametreleri gerçekten sıfırla)
for (m, name) in modules_to_prune:
    prune.remove(m, 'weight')

import os, torch
os.makedirs("assets", exist_ok=True)
torch.save(student.state_dict(), "assets/student_kd_pruned.pt")
print("✅ Kaydedildi: assets/student_kd_pruned.pt  | Final acc:", evaluate(student.eval(), testloader))


Prune sonrası (hemen) acc: 0.6131
[Post-prune finetune ep1] acc=0.7134
[Post-prune finetune ep2] acc=0.7285
✅ Kaydedildi: assets/student_kd_pruned.pt  | Final acc: 0.7285


In [17]:
import plotly.express as px
import pandas as pd

# Variant sırasını sabitle (öğretmen -> öğrenci)
order = ["Teacher-ResNet18 (GPU)", "Student-KD-Pruned (GPU)"]
df["Variant"] = pd.Categorical(df["Variant"], categories=order, ordered=True)

# 1) Accuracy – çizgi + noktalar
fig_acc = px.line(df, x="Variant", y="Acc", markers=True,
                  title="Accuracy (GPU) — Teacher vs Student",
                  line_shape="linear")
fig_acc.update_traces(mode="lines+markers")
fig_acc.update_layout(xaxis_title="", yaxis_title="Accuracy")
fig_acc.show()

# 2) Latency – çizgi + noktalar
fig_lat = px.line(df, x="Variant", y="LatencyMs", markers=True,
                  title="Latency (ms, batch=1, GPU) — Teacher vs Student")
fig_lat.update_traces(mode="lines+markers")
fig_lat.update_layout(xaxis_title="", yaxis_title="Latency (ms)")
fig_lat.show()

# 3) Model Size – çizgi + noktalar
fig_size = px.line(df, x="Variant", y="SizeMB", markers=True,
                   title="Model Size (MB) — Teacher vs Student")
fig_size.update_traces(mode="lines+markers")
fig_size.update_layout(xaxis_title="", yaxis_title="Size (MB)")
fig_size.show()


In [18]:
metrics_long = df.melt(id_vars=["Variant"], value_vars=["Acc","LatencyMs","SizeMB"],
                       var_name="Metric", value_name="Value")
metrics_long["Variant"] = pd.Categorical(metrics_long["Variant"], categories=order, ordered=True)

fig_all = px.line(metrics_long, x="Variant", y="Value", color="Metric",
                  markers=True, facet_row="Metric",
                  title="Teacher vs Student — Karşılaştırmalı Çizgi Grafikleri")
fig_all.update_traces(mode="lines+markers")
fig_all.update_layout(xaxis_title="", yaxis_title="")
fig_all.show()


In [19]:
base = df.iloc[0]
rel = pd.DataFrame({
    "Variant": df["Variant"],
    "Acc_rel": df["Acc"] / base["Acc"],
    "Latency_rel": df["LatencyMs"] / base["LatencyMs"],
    "Size_rel": df["SizeMB"] / base["SizeMB"],
})
rel_long = rel.melt(id_vars=["Variant"], var_name="Metric", value_name="RelValue")

fig_rel = px.line(rel_long, x="Variant", y="RelValue", color="Metric", markers=True,
                  title="Göreli Karşılaştırma (Teacher=1.0)")
fig_rel.update_layout(yaxis_title="Göreli Değer (Teacher=1.0)", xaxis_title="")
fig_rel.show()
