In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os
import time
import copy
import random
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score, recall_score, accuracy_score
import wandb

In [None]:
# TODO & tips:
# - hp tuning - wg instrukcji
# - zrobić kwantyzacje post training
# - pomiary czasu inferencji (testu) na baseline, pruned i quantized
# - pomiary wielkości modelu (pliku/wymaganej pamięci)
# - ustawiać nazwę swojego GPU (przy każdym wandb init)
# - można wyłączać logowanie
# - prezentacja, dokumentacja
# - uwaga bo pliki modeli się nadpisują

In [11]:
class WandbLogger:
    def __init__(self, disable_logging) -> None:
        self.disable_logging = disable_logging

    def initialize(self, config, name, gpu):
        if not self.disable_logging:
            config["gpu"] = gpu

            wandb.init(
                project="ososn-project",
                name=name,
                config=config
            )

    def log_loss(self, train_loss, val_loss, step):
        if not self.disable_logging:
            wandb.log({"train_loss": train_loss, "val_loss": val_loss}, step=step)

    def log_sparsity(self, sparsity, step):
        if not self.disable_logging:
            wandb.log({"sparsity": sparsity}, step=step)

    def log_time(self, time):
        if not self.disable_logging:
            wandb.log({"train_time": round(time, 3)})

    def log_accuracy(self, accuracy, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} accuracy": accuracy}, step=step)

    def log_f1_score(self, f1_score, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} f1_score": f1_score}, step=step)

    def log_recall(self, recall, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} recall": recall}, step=step)

    def log_precision(self, precision, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} precision": precision}, step=step)
    
    def finish(self):
        if not self.disable_logging:
            wandb.finish()

class Report:
    @staticmethod
    def report_results(y_true, y_pred, tag, wandb_logger, step=None):
        acc = accuracy_score(y_true, y_pred)
        wandb_logger.log_accuracy(acc, tag, step)

        f1 = f1_score(y_true, y_pred, average='weighted')
        recall = recall_score(y_true, y_pred, average='weighted')
        precision = precision_score(y_true, y_pred, average='weighted')

        if tag == "test":
            wandb_logger.log_f1_score(f1, tag, step)
            wandb_logger.log_recall(recall, tag, step)
            wandb_logger.log_precision(precision, tag, step)
        
        return acc, f1, recall, precision

# Etap 2 - Wybór danych i modelu

In [None]:
hp_baseline = {
    "num_epochs": 5,
    "learning_rate": 1e-3,
    "batch_size": 64,
    "model_name": "resnet18",
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
os.makedirs("models", exist_ok=True)
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

# === TRANSFORMACJE ===
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


NUM_CLASSES = 100

if not os.path.exists('./data/cifar-100-python'):
    download = True
else:
    download = False

train_set_full = torchvision.datasets.CIFAR100(root='./data', train=True, download=download, transform=transform_train)
test_set_full = torchvision.datasets.CIFAR100(root='./data', train=False, download=download, transform=transform_test)


from torch.utils.data import random_split

train_len = int(0.9 * len(train_set_full))
val_len = len(train_set_full) - train_len
train_subset, val_subset = random_split(train_set_full, [train_len, val_len], generator=torch.Generator().manual_seed(SEED))


train_subset.dataset.transform = transform_train
val_subset.dataset.transform = transform_test

train_loader = torch.utils.data.DataLoader(train_subset, batch_size=hp_baseline["batch_size"], shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=hp_baseline["batch_size"], shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set_full, batch_size=hp_baseline["batch_size"], shuffle=False)

wandb_logger = WandbLogger(disable_logging=True)
wandb_logger.initialize(
    config=hp_baseline, 
    name=f"{hp_baseline["model_name"]}-baseline-bs{hp_baseline["batch_size"]}",
    gpu="NVIDIA GeForce GTX 1060 6GB"
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)
    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_baseline["model_name"]}, Parameters: {model_size / 1e6:.2f}M")
        
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_baseline["learning_rate"])

    def train_model(model, dataloader, optimizer, criterion, num_epochs):
        since = time.time()
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f"Epoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_train_loss = running_loss / len(train_loader.dataset)
            epoch_train_acc = running_corrects.double() / len(train_loader.dataset)
            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch+1)

            # === Walidacja ===
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)

            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch+1)
            val_acc, val_f1, val_recall, val_precision = Report.report_results(val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch+1)

            print(f"Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}, Val loss: {epoch_val_loss:.4f}, Val acc {val_acc:.4f}")

            if val_acc > best_acc:
                best_acc = val_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                # torch.save(model.state_dict(), f"models/best_model_{hp_baseline["model_name"]}.pth")

        total_time = time.time() - since
        wandb_logger.log_time(total_time)

        print(f"Training complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
        model.load_state_dict(best_model_wts)
        return model

    # === EWALUACJA ===
    def evaluate_model(model, dataloader, wandb_logger):
        model.eval()
        all_preds = []
        all_labels = []
        start = time.time()

        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        duration = time.time() - start

        test_acc, test_f1, test_recall, test_precision = Report.report_results(all_labels, all_preds, tag="test", wandb_logger=wandb_logger)
        print(f"Test acc: {test_acc:.4f}, Test f1: {test_f1:.4f}, Test recall: {test_recall:.4f}, Test precision {test_precision:.4f}")
        print(f"Inference time: {duration:.2f}s")


    base_model = train_model(model, train_loader, optimizer, criterion, hp_baseline["num_epochs"])
    evaluate_model(base_model, test_loader, wandb_logger)
    # torch.save(base_model.state_dict(), f"models/final_model_{hp_baseline["model_name"]}.pth")
finally:
    wandb_logger.finish()

cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:07<00:00, 23840739.46it/s]


Extracting ./data\cifar-100-python.tar.gz to ./data
Files already downloaded and verified




Model: resnet18, Parameters: 11.23M
Epoch 1/5


KeyboardInterrupt: 

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
os.makedirs("models", exist_ok=True)
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

cuda


In [8]:
hp_baseline = {
        "num_epochs": 10,
        "learning_rate": 1e-3,
        "weight_decay": 1e-4,
        "batch_size": 64,
        "model_name": "resnet18",
        "noise_level": 0.05
}

In [9]:
# === TRANSFORMACJE ===
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

# === MAPOWANIE KLAS ===
class_mapping = {
    0: [4, 30, 55, 72, 95],
    1: [1, 32, 67, 73, 91],
    2: [54, 62, 70, 82, 92],
    3: [9, 10, 16, 28, 61],
    4: [0, 51, 53, 57, 83],
    5: [22, 39, 40, 86, 87],
    6: [5, 20, 25, 84, 94],
    7: [6, 7, 14, 18, 24],
    8: [3, 42, 43, 88, 97],
    9: [12, 17, 37, 68, 76],
    10: [23, 33, 49, 60, 71],
    11: [15, 19, 21, 31, 38],
    12: [34, 63, 64, 66, 75],
    13: [26, 45, 77, 79, 99],
    14: [2, 11, 35, 46, 98],
    15: [27, 29, 44, 78, 93],
    16: [36, 50, 65, 74, 80],
    17: [47, 52, 56, 59, 96],
    18: [8, 13, 48, 58, 90],
    19: [41, 69, 81, 85, 89]
}

custom_class_names = [
    'aquatic mammals', 'fish', 'flowers', 'food containers', 'fruit and vegetables',
    'household electrical device', 'household furniture', 'insects', 'large carnivores',
    'large man-made outdoor things', 'large natural outdoor scenes', 'large omnivores and herbivores',
    'medium-sized mammals', 'non-insect invertebrates', 'people', 'reptiles',
    'small mammals', 'trees', 'vehicles 1', 'vehicles 2'
]

label_remap = {orig: new for new, orig_list in class_mapping.items() for orig in orig_list}
NUM_CLASSES = len(class_mapping)

# === PRZYGOTOWANIE DANYCH ===
if not os.path.exists('./data/cifar-100-python'):
    download = True
else:
    download = False

train_set_full = torchvision.datasets.CIFAR100(root='./data', train=True, download=download, transform=transform_train)
test_set_full = torchvision.datasets.CIFAR100(root='./data', train=False, download=download, transform=transform_test)


def remap_dataset(dataset):
    images, labels = [], []
    for img, label in zip(dataset.data, dataset.targets):
        if label in label_remap:
            images.append(img)
            labels.append(label_remap[label])
    dataset.data = images
    dataset.targets = labels
    dataset.classes = custom_class_names
    return dataset

train_set = remap_dataset(train_set_full)
test_set = remap_dataset(test_set_full)

from torch.utils.data import random_split

train_len = int(0.9 * len(train_set))
val_len = len(train_set) - train_len
train_subset, val_subset = random_split(train_set, [train_len, val_len], generator=torch.Generator().manual_seed(SEED))

# Szumienie etykiet (10%)
def add_label_noise(dataset, noise_level=0.1):
    if isinstance(dataset, torch.utils.data.Subset):
        for i in range(len(dataset)):
            if random.random() < noise_level:
                true_index = dataset.indices[i]
                dataset.dataset.targets[true_index] = random.randint(0, NUM_CLASSES - 1)
    else:
        for i in range(len(dataset.targets)):
            if random.random() < noise_level:
                dataset.targets[i] = random.randint(0, NUM_CLASSES - 1)
    return dataset

train_subset.dataset.transform = transform_train
val_subset.dataset.transform = transform_test

train_set = add_label_noise(train_set, noise_level=hp_baseline["noise_level"])

# Dataloader
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=hp_baseline["batch_size"], shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=hp_baseline["batch_size"], shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=hp_baseline["batch_size"], shuffle=False)

In [40]:
wandb_logger = WandbLogger(disable_logging=False)
wandb_logger.initialize(
    config=hp_baseline, 
    name=f"{hp_baseline["model_name"]}-baseline-bs{hp_baseline["batch_size"]}-wd-noise{hp_baseline["noise_level"]}",
    gpu="NVIDIA GeForce GTX 1060 6GB"
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)
    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_baseline["model_name"]}, Parameters: {model_size / 1e6:.2f}M")
        
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_baseline["learning_rate"])

    def train_model(model, dataloader, optimizer, criterion, num_epochs):
        since = time.time()
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f"Epoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_train_loss = running_loss / len(train_loader.dataset)
            epoch_train_acc = running_corrects.double() / len(train_loader.dataset)
            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch+1)

            # === Walidacja ===
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)

            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch+1)
            val_acc, val_f1, val_recall, val_precision = Report.report_results(val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch+1)

            print(f"Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}, Val loss: {epoch_val_loss:.4f}, Val acc {val_acc:.4f}")

            if val_acc > best_acc:
                best_acc = val_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), f"models/best_model_{hp_baseline["model_name"]}.pth")

        total_time = time.time() - since
        wandb_logger.log_time(total_time)

        print(f"Training complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
        model.load_state_dict(best_model_wts)
        return model

    # === EWALUACJA ===
    def evaluate_model(model, dataloader, wandb_logger):
        model.eval()
        all_preds = []
        all_labels = []
        start = time.time()

        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        duration = time.time() - start

        test_acc, test_f1, test_recall, test_precision = Report.report_results(all_labels, all_preds, tag="test", wandb_logger=wandb_logger)
        print(f"Test acc: {test_acc:.4f}, Test f1: {test_f1:.4f}, Test recall: {test_recall:.4f}, Test precision {test_precision:.4f}")
        print(f"Inference time: {duration:.2f}s")


    base_model = train_model(model, train_loader, optimizer, criterion, hp_baseline["num_epochs"])
    evaluate_model(base_model, test_loader, wandb_logger)
    torch.save(base_model.state_dict(), f"models/final_model_{hp_baseline["model_name"]}.pth")
finally:
    wandb_logger.finish()




Model: resnet18, Parameters: 11.19M
Epoch 1/10
Train loss: 2.0039, Train acc: 0.4047, Val loss: 1.8609, Val acc 0.4564
Epoch 2/10
Train loss: 1.5913, Train acc: 0.5346, Val loss: 1.5716, Val acc 0.5316
Epoch 3/10
Train loss: 1.3705, Train acc: 0.6005, Val loss: 1.5150, Val acc 0.5656
Epoch 4/10
Train loss: 1.2731, Train acc: 0.6322, Val loss: 1.9334, Val acc 0.4990
Epoch 5/10
Train loss: 1.0862, Train acc: 0.6898, Val loss: 1.4793, Val acc 0.5818
Epoch 6/10
Train loss: 0.9213, Train acc: 0.7389, Val loss: 1.5599, Val acc 0.5732
Epoch 7/10
Train loss: 0.8297, Train acc: 0.7646, Val loss: 1.7781, Val acc 0.5354
Epoch 8/10
Train loss: 0.7114, Train acc: 0.7973, Val loss: 1.6468, Val acc 0.5816
Epoch 9/10
Train loss: 0.5642, Train acc: 0.8414, Val loss: 1.7847, Val acc 0.5524
Epoch 10/10
Train loss: 0.6082, Train acc: 0.8261, Val loss: 1.8811, Val acc 0.5728
Training complete in 7m 12s
Test acc: 0.6147, Test f1: 0.6096, Test recall: 0.6147, Test precision 0.6172
Inference time: 4.10s


0,1
test accuracy,▁
test f1_score,▁
test precision,▁
test recall,▁
train accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▄▃▂▂▁▁
train_time,▁
val accuracy,▁▅▇▃██▅█▆▇
val_loss,▇▂▂█▁▂▆▄▆▇

0,1
test accuracy,0.6147
test f1_score,0.60965
test precision,0.61717
test recall,0.6147
train accuracy,0.82607
train_loss,0.60823
train_time,431.54
val accuracy,0.5728
val_loss,1.8811


In [10]:
def get_model_size_mb(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()
    size_all_mb = (param_size + buffer_size) / 1024**2
    return size_all_mb

get_model_size_mb(base_model)


NameError: name 'base_model' is not defined

In [None]:
import torch.nn.utils.prune as prune

def apply_pruning(model, current_sparsity):
    # Przerzedzanie konwolucyjnych wag, procent current_sparsity np. 0.1 (10%)
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=current_sparsity)

def remove_pruning(model):
    # Usuwa maski, zachowuje sparsity na stałe
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            if prune.is_pruned(module):
                prune.remove(module, 'weight')

def count_nonzero_params(model):
    nonzero = total = 0
    for p in model.parameters():
        total += p.numel()
        nonzero += p.nonzero().size(0)
    return nonzero, total

In [None]:
import math

hp_pruning = {
    "num_epochs": 10,
    "learning_rate": 1e-3,
    # "weight_decay": 1e-4,
    "batch_size": 64,
    "model_name": "resnet18",
    "noise_level": 0.05,
    "max_sparsity": 0.75,
    "schedule": "log"
}
wandb_logger = WandbLogger(disable_logging=False)
wandb_logger.initialize(
    config=hp_pruning,
    name=f"{hp_pruning['model_name']}-pruned-bs{hp_pruning['batch_size']}-noise{hp_pruning['noise_level']}-sparsity{hp_pruning['max_sparsity']}",
    gpu="NVIDIA GeForce GTX 1060 6GB"
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)
    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_pruning['model_name']}, Parameters: {model_size / 1e6:.2f}M")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_pruning["learning_rate"], weight_decay=hp_pruning["weight_decay"])

    def train_model_with_pruning(model, train_loader, val_loader, optimizer, criterion, num_epochs, max_sparsity):
        since = time.time()
        best_acc = 0.0
        best_model_wts = None

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            # Pruning schedule
            # current_sparsity = max_sparsity * (epoch / (num_epochs - 1))
            # logarithmic schedule
            current_sparsity = max_sparsity * math.log(epoch + 1) / math.log(num_epochs)

            wandb_logger.log_sparsity(current_sparsity, epoch+1)
            # Remove old masks (if any)
            for name, module in model.named_modules():
                if isinstance(module, torch.nn.Conv2d):
                    try:
                        prune.remove(module, 'weight')
                    except ValueError:
                        pass

            # Apply new sparsity
            apply_pruning(model, current_sparsity)
            print(f"Applied pruning: sparsity = {current_sparsity:.2f}")

            # === Train ===
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_train_loss = running_loss / len(train_loader.dataset)
            epoch_train_acc = running_corrects.double() / len(train_loader.dataset)
            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch + 1)

            # === Val ===
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)
            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch + 1)

            val_acc, val_f1, val_recall, val_precision = Report.report_results(
                val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch + 1
            )

            print(f"Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}, Val loss: {epoch_val_loss:.4f}, Val acc {val_acc:.4f}")

            nonzero, total = count_nonzero_params(model)
            print(f"Nonzero params: {nonzero} / {total} ({100 * nonzero / total:.2f}%)")

            # Save best model
            if val_acc > best_acc:
                best_acc = val_acc
                remove_pruning(model)
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, f"models/best_model_pruned_{hp_pruning['model_name']}.pth")
                print("Saved new best model.")

        total_time = time.time() - since
        print(f"Training complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
        wandb_logger.log_time(total_time)

        if best_model_wts:
            remove_pruning(model)
            model.load_state_dict(best_model_wts)

        return model

    # === Ewaluacja ===
    def evaluate_model(model, dataloader, wandb_logger):
        model.eval()
        all_preds = []
        all_labels = []
        start = time.time()

        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        duration = time.time() - start

        test_acc, test_f1, test_recall, test_precision = Report.report_results(
            all_labels, all_preds, tag="test", wandb_logger=wandb_logger
        )
        print(f"Test acc: {test_acc:.4f}, Test f1: {test_f1:.4f}, Test recall: {test_recall:.4f}, Test precision {test_precision:.4f}")
        print(f"Inference time: {duration:.2f}s")

    # === Trening + Ewaluacja ===
    model = train_model_with_pruning(
        model, train_loader, val_loader, optimizer, criterion,
        num_epochs=hp_pruning["num_epochs"],
        max_sparsity=hp_pruning["max_sparsity"]
    )
    evaluate_model(model, test_loader, wandb_logger)
    torch.save(model.state_dict(), f"models/final_model_pruned_{hp_pruning['model_name']}.pth")

finally:
    wandb_logger.finish()



Model: resnet18, Parameters: 11.19M

Epoch 1/10
Applied pruning: sparsity = 0.00
Train loss: 1.9835, Train acc: 0.4143, Val loss: 2.0057, Val acc 0.4450
Nonzero params: 11186772 / 11186772 (100.00%)
Saved new best model.

Epoch 2/10
Applied pruning: sparsity = 0.23
Train loss: 1.5233, Train acc: 0.5528, Val loss: 2.1859, Val acc 0.4420
Nonzero params: 11186772 / 11186772 (100.00%)

Epoch 3/10
Applied pruning: sparsity = 0.36
Train loss: 1.3266, Train acc: 0.6150, Val loss: 1.5564, Val acc 0.5616
Nonzero params: 9533967 / 11186772 (85.23%)
Saved new best model.

Epoch 4/10
Applied pruning: sparsity = 0.45
Train loss: 1.1006, Train acc: 0.6840, Val loss: 1.4696, Val acc 0.5902
Nonzero params: 8613562 / 11186772 (77.00%)
Saved new best model.

Epoch 5/10
Applied pruning: sparsity = 0.52
Train loss: 0.9682, Train acc: 0.7236, Val loss: 1.6664, Val acc 0.5606
Nonzero params: 7969008 / 11186772 (71.24%)

Epoch 6/10
Applied pruning: sparsity = 0.58
Train loss: 0.7660, Train acc: 0.7842, Val l

0,1
sparsity,▁▃▄▅▆▆▇▇██
test accuracy,▁
test f1_score,▁
test precision,▁
test recall,▁
train accuracy,▁▃▄▅▆▇▇███
train_loss,█▆▅▄▄▃▂▁▁▁
train_time,▁
val accuracy,▁▁▇█▇█▇██▇
val_loss,▆█▂▁▃▂▃▄▅▇

0,1
sparsity,0.75
test accuracy,0.6259
test f1_score,0.62417
test precision,0.63463
test recall,0.6259
train accuracy,0.88318
train_loss,0.40119
train_time,383.903
val accuracy,0.578
val_loss,2.06171


In [None]:
import os
import gzip
import shutil

def save_model_to_file(model, path):
    torch.save(model.state_dict(), path)

def get_gzipped_model_size(original_path):
    # Kompresuje .pth do .gz i zwraca rozmiar w bajtach
    gzipped_path = original_path + ".gz"
    with open(original_path, 'rb') as f_in:
        with gzip.open(gzipped_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    size = os.path.getsize(gzipped_path)
    return size

base_path="models/final_model_resnet18.pth"
pruned_path ="models/best_model_pruned_resnet18.pth"
# save_model_to_file(model, base_path)
save_model_to_file(model, pruned_path)

# Kompresja
base_size = get_gzipped_model_size(base_path)
pruned_size = get_gzipped_model_size(pruned_path)

print(f"Rozmiar modelu przed pruningiem (gzip): {base_size / 1024:.2f} KB")
print(f"Rozmiar modelu po pruningiem (gzip): {pruned_size / 1024:.2f} KB")
print(f"Stopień kompresji: {base_size / pruned_size:.2f}x")
print(f"Procent kompresji: {(1 - pruned_size / base_size) * 100:.2f}%")

# linear 0.5
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 32393.43 KB
# Stopień kompresji: 1.26x
# Procent kompresji: 20.33%


# log 0.75
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 17150.66 KB
# Stopień kompresji: 2.37x
# Procent kompresji: 57.82%

# log 0.75 + weight_decay
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 25955.67 KB
# Stopień kompresji: 1.57x
# Procent kompresji: 36.16%

# log 0.9
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 19072.71 KB
# Stopień kompresji: 2.13x
# Procent kompresji: 53.09%

Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
Rozmiar modelu po pruningiem (gzip): 25955.67 KB
Stopień kompresji: 1.57x
Procent kompresji: 36.16%


In [None]:
### przykład kwantyzacji
# import torch
# import torch.nn as nn
# import torchvision
# import torchvision.transforms as transforms
# import os
# import copy

# # === SETUP ===
# device = torch.device('cpu')  # Quantization works only on CPU
# model = torchvision.models.resnet18(pretrained=True)
# model.eval()

# # === FUSE RESNET18 ===
# # Fuse conv, bn, relu layers in basic blocks
# def fuse_resnet(model):
#     # Fuse initial layers
#     torch.quantization.fuse_modules(model, ['conv1', 'bn1', 'relu'], inplace=True)

#     # Fuse each BasicBlock
#     for module_name, module in model.named_children():
#         if isinstance(module, torchvision.models.resnet.BasicBlock):
#             torch.quantization.fuse_modules(module, ['conv1', 'bn1', 'relu'], inplace=True)
#             torch.quantization.fuse_modules(module, ['conv2', 'bn2'], inplace=True)
#         elif isinstance(module, nn.Sequential):
#             for block_name, block in module.named_children():
#                 if isinstance(block, torchvision.models.resnet.BasicBlock):
#                     torch.quantization.fuse_modules(block, ['conv1', 'bn1', 'relu'], inplace=True)
#                     torch.quantization.fuse_modules(block, ['conv2', 'bn2'], inplace=True)
#     return model

# model = fuse_resnet(model)

# # === QUANTIZATION CONFIG ===
# model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# # === PREPARE ===
# torch.quantization.prepare(model, inplace=True)

# # === CALIBRATE === (use some training data)
# transform = transforms.Compose([
#     transforms.Resize(224),
#     transforms.ToTensor(),
# ])
# dataset = torchvision.datasets.FakeData(size=64, image_size=(3, 224, 224), num_classes=1000, transform=transform)
# loader = torch.utils.data.DataLoader(dataset, batch_size=8)

# with torch.no_grad():
#     for i, (inputs, _) in enumerate(loader):
#         model(inputs)
#         if i > 5:  # few batches are enough
#             break

# # === CONVERT ===
# quantized_model = torch.quantization.convert(model, inplace=True)

# # === MODEL SIZE ===
# def get_model_size(model, filename='temp.pth'):
#     torch.save(model.state_dict(), filename)
#     size_mb = os.path.getsize(filename) / 1e6
#     os.remove(filename)
#     return size_mb

# print(f"ResNet18 quantized size: {get_model_size(quantized_model):.2f} MB")