In [1]:
import os
import sys
project_root = os.path.abspath('../src')
if project_root not in sys.path:
    sys.path.insert(0, project_root)
import wandb
key = None # Fill in your own API key
if key is not None:
    wandb.login(key=key)
    os.environ['WANDB_API_KEY'] = key
os.environ["WANDB_NOTEBOOK_NAME"] = "cifar10.ipynb"

In [2]:
!nvidia-smi

Thu May 22 21:28:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.127.08             Driver Version: 550.127.08     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-32GB           Off |   00000000:06:00.0 Off |                    0 |
| N/A   37C    P0             72W /  300W |   14914MiB /  32768MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# train_set = SubsetSC("training")
# test_set = SubsetSC("testing")
# val_set = SubsetSC("validation")
from torchvision import transforms
import torchvision.datasets as datasets
from torch.utils.data import random_split
import torch
#Load cifar10 dataset
# In the dataset loading cell, replace with:

CIFAR10_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR10_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)
transform_train= transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_TRAIN_MEAN, CIFAR10_TRAIN_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_TRAIN_MEAN, CIFAR10_TRAIN_STD)
])
# Note: In CIFAR100, the test_set and val_set are using the same data
# This is because CIFAR100 only comes with train and test splits
# For proper evaluation, we should create a validation set from the training data

# Create a validation split from the training data
train_set_full = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)

total_size = len(train_set_full)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

val_dataset_full = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_test)

# Now split both datasets with the same indices
generator = torch.Generator().manual_seed(42)  # for reproducibility
train_set, _ = torch.utils.data.random_split(train_set_full, [train_size, val_size], generator=generator)
generator = torch.Generator().manual_seed(42) 
_, val_set = torch.utils.data.random_split(val_dataset_full, [train_size, val_size], generator=generator)
test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)



Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
print(len(train_set), len(test_set), len(val_set))

40000 10000 10000


In [None]:

from run_experiment import run_expriment
import random
import string



# activation = 'relu'
# enable_boundary_loss = False
# disable_batchnorm_grad_clip_exclusion = True
# lambda_penalty = -1
# learnable_coeffs = False
# gradient_clip_val = None


activation = 'poly'
enable_boundary_loss = True
disable_batchnorm_grad_clip_exclusion = False
lambda_penalty = 10
gradient_clip_val = 1.0
learnable_coeffs = True


pol_degree = 8

pol_degree_map = {
    2:{"B": 12, "penalty_B": 12},
    4:{"B": 20, "penalty_B": 20 * 0.75},
    8:{"B": 35, "penalty_B": 35 * 0.5},
    16:{"B": 40, "penalty_B": 40 * 0.5},
    22:{"B": 47, "penalty_B": 47 * 0.5},
}

max_epoch = 30
dropout = 0.0


project_name = "test"
num_classes = 10
data_workers = 4
model = "resnet18"
dataset = {"train": train_set, "val": val_set, "test": test_set}

run_id = "test"
custom_tag = "cifar10"

ori_activaiton = "relu"
samp_size = 100
learning_rate = 0.001

B = pol_degree_map[pol_degree]["B"]
penalty_B = pol_degree_map[pol_degree]["penalty_B"]
boundary_loss_params = {'type': 'exp', 'penalty_B':  penalty_B, 'acc_norm': 'sum'}
input_size = (3, 32, 32)



optimizer_params = {
        'type': 'adamw',
        'lr': learning_rate,
        'params': {
        }
}
scheduler_params = {'type': 'reduce_on_plateau',
                        'params': {
                                'mode': 'min',
                                'factor': 0.1,
                                'patience': 5,
                                'threshold': 0.1,
                                'verbose': True
                        },
                        'monitor': 'val_acc_epoch'
}

actvation_params =  {
        "ori_activation": ori_activaiton,
        'B': B,
        'samp_size': samp_size,
        'pol_degree': pol_degree,
        'learnable_coeffs': learnable_coeffs,
        'initialization': "least_square",
        'boundary_loss_params': boundary_loss_params

    }
model_params = {
    "use_singleton_activation": False,
    "bn_before_act": False,
    "activation": activation,
    "dropout": dropout,
    "num_classes":num_classes,
    "actvation_params": actvation_params,
    "model":model,
    "input_size": input_size
}

training_params = {
    "enable_boundary_loss": enable_boundary_loss,
    "gradient_clip_val": gradient_clip_val,
    "max_epoch": max_epoch,
    "lambda_penalty": lambda_penalty,
    "disable_batchnorm_grad_clip_exclusion": disable_batchnorm_grad_clip_exclusion,
    'optimizer_params': optimizer_params,
    'scheduler_params': scheduler_params

}

dataset_params = {
    "data_workers": data_workers,
    "dataset": dataset,
    "batch_size": 128
}

project_params = {"run_id": run_id,
                  "project_name": project_name,
                  "custom_tag": custom_tag
                  }


run_expriment(project_params=project_params, dataset_params=dataset_params,
               model_params=model_params, training_params=training_params)


#
#       \


B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8


B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8
B 35 samp_size 100 initialization least_square pol_degree 8


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params | Mode 
------------------------------------------------------------
0 | model          | ResNet18         | 11.2 M | train
1 | base_criterion | CrossEntropyLoss | 0      | train
2 | criterion      | CustomPolyLoss   | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.696    Total estimated model params size (MB)
133       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.
Restoring states from the checkpoint path at /code/Polynomial-NN/notebooks/lightning_logs/version_34/checkpoints/best-acc-epoch=29-val_acc=0.00.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /code/Polynomial-NN/notebooks/lightning_logs/version_34/checkpoints/best-acc-epoch=29-val_acc=0.00.ckpt


Training completed!


Validation: |          | 0/? [00:00<?, ?it/s]

Restoring states from the checkpoint path at /code/Polynomial-NN/notebooks/lightning_logs/version_34/checkpoints/best-acc-epoch=29-val_acc=0.00.ckpt


────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Validate metric               DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       val_acc_epoch             0.9082000255584717
  val_boundary_loss_epoch       0.001312352018430829
val_cross_entropy_loss_epoch     0.2775616943836212
       val_loss_epoch            0.2775616943836212
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /code/Polynomial-NN/notebooks/lightning_logs/version_34/checkpoints/best-acc-epoch=29-val_acc=0.00.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Validate metric               DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       val_acc_epoch             0.9032999873161316
  val_boundary_loss_epoch       0.02821333333849907
val_cross_entropy_loss_epoch     0.572454571723938
       val_loss_epoch            0.572454571723938
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
