In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os
import time
import copy
import random
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score, recall_score, accuracy_score
import wandb

In [None]:
# TODO & tips:
# - hp tuning - wg instrukcji
# - zrobić kwantyzacje post training
# - pomiary czasu inferencji (testu) na baseline, pruned i quantized
# - pomiary wielkości modelu (pliku/wymaganej pamięci)
# - ustawiać nazwę swojego GPU (przy każdym wandb init)
# - można wyłączać logowanie
# - prezentacja, dokumentacja
# - uwaga bo pliki modeli się nadpisują

# ETAP DODATKOWY - Inicjalizacja logowania

In [14]:
GPU_1 = "NVIDIA GeForce GTX 1060 6GB"
GPU_2 = "NVIDIA GeForce RTX 3060 12GB"

PICKED_GPU = GPU_2

In [15]:
class WandbLogger:
    def __init__(self, disable_logging) -> None:
        self.disable_logging = disable_logging

    def initialize(self, config, name, gpu):
        if not self.disable_logging:
            config["gpu"] = gpu

            wandb.init(
                project="ososn-project",
                name=name,
                config=config
            )

    def log_loss(self, train_loss, val_loss, step):
        if not self.disable_logging:
            wandb.log({"train_loss": train_loss, "val_loss": val_loss}, step=step)

    def log_sparsity(self, sparsity, step):
        if not self.disable_logging:
            wandb.log({"sparsity": sparsity}, step=step)

    def log_time(self, time):
        if not self.disable_logging:
            wandb.log({"train_time": round(time, 3)})

    def log_accuracy(self, accuracy, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} accuracy": accuracy}, step=step)

    def log_f1_score(self, f1_score, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} f1_score": f1_score}, step=step)

    def log_recall(self, recall, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} recall": recall}, step=step)

    def log_precision(self, precision, tag, step):
        if not self.disable_logging:
            wandb.log({f"{tag} precision": precision}, step=step)
    
    def finish(self):
        if not self.disable_logging:
            wandb.finish()

class Report:
    @staticmethod
    def report_results(y_true, y_pred, tag, wandb_logger, step=None):
        acc = accuracy_score(y_true, y_pred)
        wandb_logger.log_accuracy(acc, tag, step)

        f1 = f1_score(y_true, y_pred, average='weighted')
        recall = recall_score(y_true, y_pred, average='weighted')
        precision = precision_score(y_true, y_pred, average='weighted')

        if tag == "test":
            wandb_logger.log_f1_score(f1, tag, step)
            wandb_logger.log_recall(recall, tag, step)
            wandb_logger.log_precision(precision, tag, step)
        
        return acc, f1, recall, precision

# Etap 2 - Wybór danych i modelu

In [17]:
import os
import random
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np

hp_sanity = {
    "num_epochs": 1,
    "learning_rate": 1e-3,
    "batch_size": 64,
    "model_name": "resnet18",
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
os.makedirs("models", exist_ok=True)

SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

# === TRANSFORMACJE ===
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

NUM_CLASSES = 100

if not os.path.exists('./data/cifar-100-python'):
    download = True
else:
    download = False

train_set_full = torchvision.datasets.CIFAR100(root='./data', train=True, download=download, transform=transform_train)
test_set_full = torchvision.datasets.CIFAR100(root='./data', train=False, download=download, transform=transform_test)

from torch.utils.data import random_split

train_len = int(0.9 * len(train_set_full))
val_len = len(train_set_full) - train_len
train_subset, val_subset = random_split(train_set_full, [train_len, val_len], generator=torch.Generator().manual_seed(SEED))

train_subset.dataset.transform = transform_train
val_subset.dataset.transform = transform_test

train_loader = torch.utils.data.DataLoader(train_subset, batch_size=hp_sanity["batch_size"], shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=hp_sanity["batch_size"], shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set_full, batch_size=hp_sanity["batch_size"], shuffle=False)

wandb_logger = WandbLogger(disable_logging=True)
wandb_logger.initialize(
    config=hp_sanity,
    name=f"{hp_sanity['model_name']}-baseline-bs{hp_sanity['batch_size']}",
    gpu=PICKED_GPU,
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)

    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_sanity['model_name']}, Parameters: {model_size:,} ({model_size / 1e6:.2f}M)")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_sanity["learning_rate"])

    def train_model(model, dataloader, optimizer, criterion, num_epochs):
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0
        torch.cuda.reset_peak_memory_stats(device)

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            start_epoch = time.time()
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_time = time.time() - start_epoch
            epoch_train_loss = running_loss / len(dataloader.dataset)
            epoch_train_acc = running_corrects.double() / len(dataloader.dataset)

            mem_allocated = torch.cuda.max_memory_allocated(device) / (1024 ** 2)

            print(f"Train Loss: {epoch_train_loss:.4f}, Acc: {epoch_train_acc:.4f}")
            print(f"Epoch training time: {epoch_time:.2f} sec")
            print(f"Peak memory usage (training): {mem_allocated:.2f} MB")

            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch + 1)
            wandb_logger.log_loss(epoch_train_loss, 0.0, epoch + 1)  # dummy val loss here
            wandb_logger.log_time(epoch_time)

            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)
            val_acc, val_f1, val_recall, val_precision = Report.report_results(val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch + 1)

            print(f"Val Loss: {epoch_val_loss:.4f}, Acc: {val_acc:.4f}")
            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch + 1)

            if val_acc > best_acc:
                best_acc = val_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print("Training finished.")
        model.load_state_dict(best_model_wts)
        return model

    def evaluate_model(model, dataloader, wandb_logger):
        model.eval()
        all_preds = []
        all_labels = []

        start_time = time.time()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        inference_time = time.time() - start_time

        test_acc, test_f1, test_recall, test_precision = Report.report_results(all_labels, all_preds, tag="test", wandb_logger=wandb_logger)
        print(f"\nTest Acc: {test_acc:.4f}, F1: {test_f1:.4f}, Recall: {test_recall:.4f}, Precision: {test_precision:.4f}")
        print(f"Inference Time (total): {inference_time:.2f} sec")

    trained_model = train_model(model, train_loader, optimizer, criterion, hp_sanity["num_epochs"])
    evaluate_model(trained_model, test_loader, wandb_logger)

finally:
    wandb_logger.finish()


Using device: cuda




Model: resnet18, Parameters: 11,227,812 (11.23M)

Epoch 1/1
Train Loss: 2.0291, Acc: 0.4532
Epoch training time: 143.89 sec
Peak memory usage (training): 2022.98 MB


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Val Loss: 2.0424, Acc: 0.4624
Training finished.

Test Acc: 0.4610, F1: 0.4526, Recall: 0.4610, Precision: 0.5835
Inference Time (total): 16.62 sec


# ETAP TRZECI - Określenie zadania docelowego i przygotowanie danych

In [18]:
hp_baseline = {
        "num_epochs": 5,
        "learning_rate": 1e-3,
        # "weight_decay": 1e-4,
        "batch_size": 64,
        "model_name": "resnet18",
        "noise_level": 0.05
}

In [26]:
# === MAPOWANIE KLAS ===
class_mapping = {
    0: [4, 30, 55, 72, 95],
    1: [1, 32, 67, 73, 91],
    2: [54, 62, 70, 82, 92],
    3: [9, 10, 16, 28, 61],
    4: [0, 51, 53, 57, 83],
    5: [22, 39, 40, 86, 87],
    6: [5, 20, 25, 84, 94],
    7: [6, 7, 14, 18, 24],
    8: [3, 42, 43, 88, 97],
    9: [12, 17, 37, 68, 76],
    10: [23, 33, 49, 60, 71],
    11: [15, 19, 21, 31, 38],
    12: [34, 63, 64, 66, 75],
    13: [26, 45, 77, 79, 99],
    14: [2, 11, 35, 46, 98],
    15: [27, 29, 44, 78, 93],
    16: [36, 50, 65, 74, 80],
    17: [47, 52, 56, 59, 96],
    18: [8, 13, 48, 58, 90],
    19: [41, 69, 81, 85, 89]
}

custom_class_names = [
    'aquatic mammals', 'fish', 'flowers', 'food containers', 'fruit and vegetables',
    'household electrical device', 'household furniture', 'insects', 'large carnivores',
    'large man-made outdoor things', 'large natural outdoor scenes', 'large omnivores and herbivores',
    'medium-sized mammals', 'non-insect invertebrates', 'people', 'reptiles',
    'small mammals', 'trees', 'vehicles 1', 'vehicles 2'
]

label_remap = {orig: new for new, orig_list in class_mapping.items() for orig in orig_list}
NUM_CLASSES = len(class_mapping)

# === PRZYGOTOWANIE DANYCH ===
if not os.path.exists('./data/cifar-100-python'):
    download = True
else:
    download = False

train_set_full = torchvision.datasets.CIFAR100(root='./data', train=True, download=download, transform=transform_train)
test_set_full = torchvision.datasets.CIFAR100(root='./data', train=False, download=download, transform=transform_test)


# === TRANSFORMACJE ===
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4865, 0.4409],
                         std=[0.2673, 0.2564, 0.2761]),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4865, 0.4409],
                         std=[0.2673, 0.2564, 0.2761]),
])


def remap_dataset(dataset):
    images, labels = [], []
    for img, label in zip(dataset.data, dataset.targets):
        if label in label_remap:
            images.append(img)
            labels.append(label_remap[label])
    dataset.data = images
    dataset.targets = labels
    dataset.classes = custom_class_names
    return dataset

train_set = remap_dataset(train_set_full)
test_set = remap_dataset(test_set_full)

from torch.utils.data import random_split

train_len = int(0.9 * len(train_set))
val_len = len(train_set) - train_len
train_subset, val_subset = random_split(train_set, [train_len, val_len], generator=torch.Generator().manual_seed(SEED))

# Szumienie etykiet (10%)
def add_label_noise(dataset, noise_level=0.1):
    if isinstance(dataset, torch.utils.data.Subset):
        for i in range(len(dataset)):
            if random.random() < noise_level:
                true_index = dataset.indices[i]
                dataset.dataset.targets[true_index] = random.randint(0, NUM_CLASSES - 1)
    else:
        for i in range(len(dataset.targets)):
            if random.random() < noise_level:
                dataset.targets[i] = random.randint(0, NUM_CLASSES - 1)
    return dataset

train_subset.dataset.transform = transform_train
val_subset.dataset.transform = transform_test

train_set = add_label_noise(train_set, noise_level=hp_baseline["noise_level"])

# Dataloader
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=hp_baseline["batch_size"], shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=hp_baseline["batch_size"], shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=hp_baseline["batch_size"], shuffle=False)

# ETAP 4 - Douczanie modelu odniesienia

In [None]:
wandb_logger = WandbLogger(disable_logging=False)
wandb_logger.initialize(
    config=hp_baseline,
    name=f"{hp_baseline["model_name"]}-baseline-bs{hp_baseline["batch_size"]}-noise{hp_baseline["noise_level"]}",
    gpu=PICKED_GPU,
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)
    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_baseline["model_name"]}, Parameters: {model_size / 1e6:.2f}M")
        
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_baseline["learning_rate"])

    def train_model(model, dataloader, optimizer, criterion, num_epochs):
        since = time.time()
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f"Epoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_train_loss = running_loss / len(train_loader.dataset)
            epoch_train_acc = running_corrects.double() / len(train_loader.dataset)
            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch+1)

            # === Walidacja ===
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)

            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch+1)
            val_acc, val_f1, val_recall, val_precision = Report.report_results(val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch+1)

            print(f"Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}, Val loss: {epoch_val_loss:.4f}, Val acc {val_acc:.4f}")

            if val_acc > best_acc:
                best_acc = val_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), f"models/best_model_{hp_baseline["model_name"]}.pth")

        total_time = time.time() - since
        wandb_logger.log_time(total_time)

        print(f"Training complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
        model.load_state_dict(best_model_wts)
        return model

    # === EWALUACJA ===
    def evaluate_model(model, dataloader, wandb_logger, device="cuda"):
        model.eval()
        all_preds = []
        all_labels = []
        start = time.time()

        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        duration = time.time() - start

        test_acc, test_f1, test_recall, test_precision = Report.report_results(all_labels, all_preds, tag="test", wandb_logger=wandb_logger)
        print(f"Test acc: {test_acc:.4f}, Test f1: {test_f1:.4f}, Test recall: {test_recall:.4f}, Test precision {test_precision:.4f}")
        print(f"Inference time: {duration:.2f}s")


    base_model = train_model(model, train_loader, optimizer, criterion, hp_baseline["num_epochs"])
    evaluate_model(base_model, test_loader, wandb_logger)
    torch.save(base_model.state_dict(), f"models/final_model_{hp_baseline["model_name"]}.pth")
finally:
    wandb_logger.finish()




Model: resnet18, Parameters: 11.19M
Epoch 1/5
Train loss: 1.5134, Train acc: 0.5583, Val loss: 1.2706, Val acc 0.6374
Epoch 2/5
Train loss: 1.0947, Train acc: 0.6953, Val loss: 1.2433, Val acc 0.6496
Epoch 3/5
Train loss: 0.8805, Train acc: 0.7612, Val loss: 1.1015, Val acc 0.7062
Epoch 4/5
Train loss: 0.6731, Train acc: 0.8203, Val loss: 1.1597, Val acc 0.7020
Epoch 5/5
Train loss: 0.4844, Train acc: 0.8685, Val loss: 1.3400, Val acc 0.6770
Training complete in 13m 46s


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Test acc: 0.7216, Test f1: 0.7202, Test recall: 0.7216, Test precision 0.7493
Inference time: 18.53s


0,1
test accuracy,▁
test f1_score,▁
test precision,▁
test recall,▁
train accuracy,▁▄▆▇█
train_loss,█▅▄▂▁
train_time,▁
val accuracy,▁▂██▅
val_loss,▆▅▁▃█

0,1
test accuracy,0.7216
test f1_score,0.72023
test precision,0.74934
test recall,0.7216
train accuracy,0.86853
train_loss,0.48441
train_time,826.279
val accuracy,0.677
val_loss,1.34005


In [None]:
import torch
import torchvision.models as models
from fvcore.nn import FlopCountAnalysis
import torch.nn as nn

model.load_state_dict(torch.load("models/best_model_resnet18_base.pth", map_location=device))
best_base_model = model.to(device)
best_base_model.eval()

input_tensor = torch.randn(1, 3, 224, 224).to(device)

flops = FlopCountAnalysis(best_base_model, input_tensor)
print(f"FLOPS: {flops.total():,}")


Unsupported operator aten::max_pool2d encountered 1 time(s)
Unsupported operator aten::add_ encountered 8 time(s)


FLOPS: 1,818,564,096


In [43]:
def get_model_size_mb(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()
    size_all_mb = (param_size + buffer_size) / 1024**2
    return size_all_mb

get_model_size_mb(best_base_model)


42.71092224121094

# ETAP 5 - Optymalizacja modelu

In [36]:
import torch.nn.utils.prune as prune

def apply_pruning(model, current_sparsity):
    # Przerzedzanie konwolucyjnych wag, procent current_sparsity np. 0.1 (10%)
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=current_sparsity)

def remove_pruning(model):
    # Usuwa maski, zachowuje sparsity na stałe
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            if prune.is_pruned(module):
                prune.remove(module, 'weight')

def count_nonzero_params(model):
    nonzero = total = 0
    for p in model.parameters():
        total += p.numel()
        nonzero += p.nonzero().size(0)
    return nonzero, total

In [39]:
import math

hp_pruning = {
    "num_epochs": 5,
    "learning_rate": 1e-3,
    # "weight_decay": 1e-4,
    "batch_size": 64,
    "model_name": "resnet18",
    "noise_level": 0.05,
    "max_sparsity": 0.75,
    "schedule": "log"
}
wandb_logger = WandbLogger(disable_logging=False)
wandb_logger.initialize(
    config=hp_pruning,
    name=f"{hp_pruning['model_name']}-pruned-bs{hp_pruning['batch_size']}-noise{hp_pruning['noise_level']}-sparsity{hp_pruning['max_sparsity']}",
    gpu=PICKED_GPU
)

try:
    model = torchvision.models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, NUM_CLASSES)
    model = model.to(device)
    model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model: {hp_pruning['model_name']}, Parameters: {model_size / 1e6:.2f}M")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=hp_pruning["learning_rate"])

    def train_model_with_pruning(model, train_loader, val_loader, optimizer, criterion, num_epochs, max_sparsity):
        since = time.time()
        best_acc = 0.0
        best_model_wts = None

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch + 1}/{num_epochs}")
            model.train()
            running_loss = 0.0
            running_corrects = 0

            # Pruning schedule
            # current_sparsity = max_sparsity * (epoch / (num_epochs - 1))
            # logarithmic schedule
            current_sparsity = max_sparsity * math.log(epoch + 1) / math.log(num_epochs)

            wandb_logger.log_sparsity(current_sparsity, epoch+1)
            # Remove old masks (if any)
            for name, module in model.named_modules():
                if isinstance(module, torch.nn.Conv2d):
                    try:
                        prune.remove(module, 'weight')
                    except ValueError:
                        pass

            # Apply new sparsity
            apply_pruning(model, current_sparsity)
            print(f"Applied pruning: sparsity = {current_sparsity:.2f}")

            # === Train ===
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_train_loss = running_loss / len(train_loader.dataset)
            epoch_train_acc = running_corrects.double() / len(train_loader.dataset)
            wandb_logger.log_accuracy(epoch_train_acc, "train", epoch + 1)

            # === Val ===
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    val_loss += loss.item() * inputs.size(0)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            epoch_val_loss = val_loss / len(val_loader.dataset)
            wandb_logger.log_loss(epoch_train_loss, epoch_val_loss, epoch + 1)

            val_acc, val_f1, val_recall, val_precision = Report.report_results(
                val_labels, val_preds, tag="val", wandb_logger=wandb_logger, step=epoch + 1
            )

            print(f"Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}, Val loss: {epoch_val_loss:.4f}, Val acc {val_acc:.4f}")

            nonzero, total = count_nonzero_params(model)
            print(f"Nonzero params: {nonzero} / {total} ({100 * nonzero / total:.2f}%)")

            # Save best model
            if val_acc > best_acc:
                best_acc = val_acc
                remove_pruning(model)
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, f"models/best_model_pruned_{hp_pruning['model_name']}.pth")
                print("Saved new best model.")

        total_time = time.time() - since
        print(f"Training complete in {total_time // 60:.0f}m {total_time % 60:.0f}s")
        wandb_logger.log_time(total_time)

        if best_model_wts:
            remove_pruning(model)
            model.load_state_dict(best_model_wts)

        return model

    # === Ewaluacja ===
    def evaluate_model(model, dataloader, wandb_logger):
        model.eval()
        all_preds = []
        all_labels = []
        start = time.time()

        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        duration = time.time() - start

        test_acc, test_f1, test_recall, test_precision = Report.report_results(
            all_labels, all_preds, tag="test", wandb_logger=wandb_logger
        )
        print(f"Test acc: {test_acc:.4f}, Test f1: {test_f1:.4f}, Test recall: {test_recall:.4f}, Test precision {test_precision:.4f}")
        print(f"Inference time: {duration:.2f}s")

    # === Trening + Ewaluacja ===
    model = train_model_with_pruning(
        model, train_loader, val_loader, optimizer, criterion,
        num_epochs=hp_pruning["num_epochs"],
        max_sparsity=hp_pruning["max_sparsity"]
    )
    evaluate_model(model, test_loader, wandb_logger)
    torch.save(model.state_dict(), f"models/final_model_pruned_{hp_pruning['model_name']}.pth")

finally:
    wandb_logger.finish()



Model: resnet18, Parameters: 11.19M

Epoch 1/5
Applied pruning: sparsity = 0.00
Train loss: 1.5324, Train acc: 0.5510, Val loss: 1.2708, Val acc 0.6354
Nonzero params: 11186772 / 11186772 (100.00%)
Saved new best model.

Epoch 2/5
Applied pruning: sparsity = 0.32
Train loss: 1.0173, Train acc: 0.7201, Val loss: 1.0790, Val acc 0.7124
Nonzero params: 11186772 / 11186772 (100.00%)
Saved new best model.

Epoch 3/5
Applied pruning: sparsity = 0.51
Train loss: 0.7260, Train acc: 0.8103, Val loss: 1.0635, Val acc 0.7242
Nonzero params: 11180988 / 11186772 (99.95%)
Saved new best model.

Epoch 4/5
Applied pruning: sparsity = 0.65
Train loss: 0.4609, Train acc: 0.8772, Val loss: 1.2726, Val acc 0.7056
Nonzero params: 11180988 / 11186772 (99.95%)

Epoch 5/5
Applied pruning: sparsity = 0.75
Train loss: 0.2591, Train acc: 0.9273, Val loss: 1.4005, Val acc 0.7194
Nonzero params: 11180988 / 11186772 (99.95%)
Training complete in 14m 51s


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Test acc: 0.7532, Test f1: 0.7500, Test recall: 0.7532, Test precision 0.7590
Inference time: 21.26s


0,1
sparsity,▁▄▆▇█
test accuracy,▁
test f1_score,▁
test precision,▁
test recall,▁
train accuracy,▁▄▆▇█
train_loss,█▅▄▂▁
train_time,▁
val accuracy,▁▇█▇█
val_loss,▅▁▁▅█

0,1
sparsity,0.75
test accuracy,0.7532
test f1_score,0.74996
test precision,0.75901
test recall,0.7532
train accuracy,0.92727
train_loss,0.25914
train_time,891.154
val accuracy,0.7194
val_loss,1.40046


In [42]:
import os
import gzip
import shutil

def save_model_to_file(model, path):
    torch.save(model.state_dict(), path)

def get_gzipped_model_size(original_path):
    # Kompresuje .pth do .gz i zwraca rozmiar w bajtach
    gzipped_path = original_path + ".gz"
    with open(original_path, 'rb') as f_in:
        with gzip.open(gzipped_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    size = os.path.getsize(gzipped_path)
    return size

base_path="models/best_model_resnet18_base.pth"
pruned_path ="models/best_model_pruned_resnet18.pth"
# save_model_to_file(model, base_path)
save_model_to_file(model, pruned_path)

# Kompresja
base_size = get_gzipped_model_size(base_path)
pruned_size = get_gzipped_model_size(pruned_path)

print(f"Rozmiar modelu przed pruningiem (gzip): {base_size / 1024:.2f} KB")
print(f"Rozmiar modelu po pruningiem (gzip): {pruned_size / 1024:.2f} KB")
print(f"Stopień kompresji: {base_size / pruned_size:.2f}x")
print(f"Procent kompresji: {(1 - pruned_size / base_size) * 100:.2f}%")

# linear 0.5
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 32393.43 KB
# Stopień kompresji: 1.26x
# Procent kompresji: 20.33%


# log 0.75
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 17150.66 KB
# Stopień kompresji: 2.37x
# Procent kompresji: 57.82%

# log 0.75 + weight_decay
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 25955.67 KB
# Stopień kompresji: 1.57x
# Procent kompresji: 36.16%

# log 0.9
# Rozmiar modelu przed pruningiem (gzip): 40660.24 KB
# Rozmiar modelu po pruningiem (gzip): 19072.71 KB
# Stopień kompresji: 2.13x
# Procent kompresji: 53.09%

Rozmiar modelu przed pruningiem (gzip): 40611.58 KB
Rozmiar modelu po pruningiem (gzip): 23839.02 KB
Stopień kompresji: 1.70x
Procent kompresji: 41.30%


In [46]:
import torch
import torchvision.models as models
from fvcore.nn import FlopCountAnalysis
import torch.nn as nn

model.load_state_dict(torch.load("models/best_model_pruned_resnet18.pth", map_location=device))
best_base_model_pruned = model.to(device)
best_base_model_pruned.eval()

input_tensor = torch.randn(1, 3, 224, 224).to(device)

flops = FlopCountAnalysis(best_base_model_pruned, input_tensor)
print(f"FLOPS: {flops.total():,}")

Unsupported operator aten::max_pool2d encountered 1 time(s)
Unsupported operator aten::add_ encountered 8 time(s)


FLOPS: 1,818,564,096


# ETAP 6 - Dodakowe optymalizacje

In [None]:
## przykład kwantyzacji
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import os
import copy

# === SETUP ===
device = torch.device('cpu')  # Quantization works only on CPU
model = torchvision.models.resnet18(pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, NUM_CLASSES)
model = model.to(device)
model_size = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: {hp_pruning['model_name']}, Parameters: {model_size / 1e6:.2f}M")
model.load_state_dict(torch.load("models/best_model_pruned_resnet18.pth", map_location=device))
print(f"ResNet18 before quantization: {get_model_size(model):.2f} MB")
model.eval()

# === FUSE RESNET18 ===
def fuse_resnet(model):
    torch.quantization.fuse_modules(model, ['conv1', 'bn1', 'relu'], inplace=True)
    for module_name, module in model.named_children():
        if isinstance(module, torchvision.models.resnet.BasicBlock):
            torch.quantization.fuse_modules(module, ['conv1', 'bn1', 'relu'], inplace=True)
            torch.quantization.fuse_modules(module, ['conv2', 'bn2'], inplace=True)
        elif isinstance(module, nn.Sequential):
            for block_name, block in module.named_children():
                if isinstance(block, torchvision.models.resnet.BasicBlock):
                    torch.quantization.fuse_modules(block, ['conv1', 'bn1', 'relu'], inplace=True)
                    torch.quantization.fuse_modules(block, ['conv2', 'bn2'], inplace=True)
    return model

model = fuse_resnet(model)

# === QUANTIZATION CONFIG ===
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# === PREPARE ===
torch.quantization.prepare(model, inplace=True)

# === CALIBRATE === (use some training data)
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
])

# train_set_full = torchvision.datasets.CIFAR100(root='./data', train=True, download=download, transform=transform_train)
# test_set_full = torchvision.datasets.CIFAR100(root='./data', train=False, download=download, transform=transform_test)

dataset = torchvision.datasets.FakeData(size=64, image_size=(3, 224, 224), num_classes=1000, transform=transform)
loader = torch.utils.data.DataLoader(dataset, batch_size=8)

with torch.no_grad():
    for i, (inputs, _) in enumerate(loader):
        model(inputs)
        if i > 5:
            break

# === CONVERT ===
quantized_model = torch.quantization.convert(model, inplace=True)

# === MODEL SIZE ===
def get_model_size(model, filename='temp.pth'):
    torch.save(model.state_dict(), filename)
    size_mb = os.path.getsize(filename) / 1e6
    os.remove(filename)
    return size_mb

print(f"ResNet18 quantized size: {get_model_size(quantized_model):.2f} MB")

Model: resnet18, Parameters: 11.19M
ResNet18 before quantization: 44.82 MB
ResNet18 quantized size: 11.32 MB


In [55]:
quantized_model = quantized_model.to(device)
evaluate_model(quantized_model, loader, wandb_logger)

NotImplementedError: Could not run 'quantized::conv2d_relu.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d_relu.new' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMTIA, AutogradMeta, Tracer, AutocastCPU, AutocastMTIA, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Meta: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\quantized\cpu\qconv.cpp:2044 [kernel]
QuantizedCUDA: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\quantized\cudnn\Conv.cpp:386 [kernel]
BackendSelect: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:194 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\DynamicLayer.cpp:479 [backend fallback]
Functionalize: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:100 [backend fallback]
AutogradOther: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMPS: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:91 [backend fallback]
AutogradXPU: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:104 [backend fallback]
AutogradLazy: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:87 [backend fallback]
AutogradMTIA: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMeta: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:95 [backend fallback]
Tracer: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\autograd\TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\autocast_mode.cpp:322 [backend fallback]
AutocastMTIA: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\autocast_mode.cpp:466 [backend fallback]
AutocastXPU: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\autocast_mode.cpp:504 [backend fallback]
AutocastMPS: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\TensorWrapper.cpp:208 [backend fallback]
PythonTLSSnapshot: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:202 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\functorch\DynamicLayer.cpp:475 [backend fallback]
PreDispatch: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:206 [backend fallback]
PythonDispatcher: registered at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:198 [backend fallback]
