In [1]:
!rm -rf self-expanding-nets
!git clone https://github.com/CTLab-ITMO/self-expanding-nets
%pip uninstall senmodel
%pip install -U -e ./self-expanding-nets/

Cloning into 'self-expanding-nets'...
remote: Enumerating objects: 1061, done.[K
remote: Counting objects: 100% (193/193), done.[K
remote: Compressing objects: 100% (131/131), done.[K
remote: Total 1061 (delta 118), reused 117 (delta 54), pack-reused 868 (from 1)[K
Receiving objects: 100% (1061/1061), 2.41 MiB | 5.00 MiB/s, done.
Resolving deltas: 100% (638/638), done.
[0mObtaining file:///content/self-expanding-nets
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->senmodel==1.0.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->senmodel==1.0.0)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014

## Imports

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split, TensorDataset
from torchvision import datasets, transforms
import os
import random
import numpy as np

SEED = 0
torch.manual_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
g = torch.Generator()
g.manual_seed(0)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'  # or ':16:8'


################################
#     RESTART     RUNTIME      #
################################
from senmodel.model.utils import *
from senmodel.metrics.nonlinearity_metrics import *
from senmodel.metrics.edge_finder import *
from senmodel.metrics.train_metrics import *
from senmodel.train.train import *

## Data

In [3]:
BATCH_SIZE = 128

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

train_dataset = datasets.FashionMNIST(root='./data', train=True,
                                      download=True, transform=transform)
val_dataset = datasets.FashionMNIST(root='./data', train=False,
                                    download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

100%|██████████| 26.4M/26.4M [00:02<00:00, 12.7MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 201kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.72MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 9.89MB/s]


## Model

In [4]:
class SimpleFCN(nn.Module):
    def __init__(self, input_size=28 * 28, hidden_size=16, output_size=10):
        super(SimpleFCN, self).__init__()
        self.fc0 = nn.Linear(input_size, output_size)

    def forward(self, x):
        x = self.fc0(x)
        return x

In [5]:
model = SimpleFCN()
sparse_model = convert_dense_to_sparse_network(model, layers=[model.fc0], device=device)

## Train

In [6]:
hyperparams = {
    'num_epochs': 64,
    'metric': AbsGradientEdgeMetric(nn.CrossEntropyLoss()),
    'aggregation_mode': 'mean',
    'choose_thresholds': {'fc0': 0.6},
    'replace_layers': ['fc0'],
    'threshold': 0.05,
    'min_delta_epoch_replace': 8,
    'window_size': 5,
    'lr': 0.00012,
    'delete_after': 2,
    'task_type': 'classification',
    'max_to_replace': None,
    'choose_thresholds_del': {'fc0': 0.02},
    'fully_connected': False
}

name = ", ".join(
    f"{key}: {value.__class__.__name__ if key == 'metric' else value}"
    for key, value in hyperparams.items()
)

name

"num_epochs: 64, metric: AbsGradientEdgeMetric, aggregation_mode: mean, choose_thresholds: {'fc0': 0.6}, replace_layers: ['fc0'], threshold: 0.05, min_delta_epoch_replace: 8, window_size: 5, lr: 0.00012, delete_after: 2, task_type: classification, max_to_replace: None, choose_thresholds_del: {'fc0': 0.02}, fully_connected: False"

In [7]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdown-shift[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [8]:
wandb.finish()
run = wandb.init(
    project="self-expanding-nets",
    name=f"{name}",
)

In [None]:
criterion = nn.CrossEntropyLoss()
train_sparse_recursive(sparse_model, train_loader, train_loader, val_loader, criterion, hyperparams, device)

100%|██████████| 469/469 [00:08<00:00, 52.33it/s]


Epoch 1/64, Train Loss: 1.3163, Val Loss: 0.9454, Val Accuracy: 0.7051


100%|██████████| 469/469 [00:08<00:00, 58.03it/s]


Epoch 2/64, Train Loss: 0.8291, Val Loss: 0.7658, Val Accuracy: 0.7487


100%|██████████| 469/469 [00:08<00:00, 56.57it/s]


Epoch 3/64, Train Loss: 0.7091, Val Loss: 0.6879, Val Accuracy: 0.7696


100%|██████████| 469/469 [00:07<00:00, 65.66it/s]


Epoch 4/64, Train Loss: 0.6467, Val Loss: 0.6398, Val Accuracy: 0.7866


100%|██████████| 469/469 [00:08<00:00, 58.19it/s]


Epoch 5/64, Train Loss: 0.6062, Val Loss: 0.6082, Val Accuracy: 0.7985


100%|██████████| 469/469 [00:07<00:00, 58.90it/s]


Epoch 6/64, Train Loss: 0.5775, Val Loss: 0.5853, Val Accuracy: 0.8051


100%|██████████| 469/469 [00:07<00:00, 64.36it/s]


Epoch 7/64, Train Loss: 0.5557, Val Loss: 0.5687, Val Accuracy: 0.8089


100%|██████████| 469/469 [00:07<00:00, 63.86it/s]


Epoch 8/64, Train Loss: 0.5386, Val Loss: 0.5527, Val Accuracy: 0.8144


100%|██████████| 469/469 [00:07<00:00, 59.20it/s]


Epoch 9/64, Train Loss: 0.5245, Val Loss: 0.5437, Val Accuracy: 0.8142


100%|██████████| 469/469 [00:07<00:00, 59.28it/s]


Epoch 10/64, Train Loss: 0.5129, Val Loss: 0.5315, Val Accuracy: 0.8204
Chosen edges: tensor([[  0,   0,   0,  ...,   6,   6,   6],
        [ 39,  40,  41,  ..., 748, 749, 750]], device='cuda:0') 1681


100%|██████████| 469/469 [00:12<00:00, 37.97it/s]


Epoch 11/64, Train Loss: 0.4941, Val Loss: 0.5009, Val Accuracy: 0.8273


100%|██████████| 469/469 [00:12<00:00, 38.05it/s]


Epoch 12/64, Train Loss: 0.4574, Val Loss: 0.4812, Val Accuracy: 0.8328
torch.Size([806880]) torch.Size([22969])
combined_metrics torch.Size([829849])
mask torch.Size([829849])
tensor(239221, device='cuda:0')
num_emb_edges 806880
tensor(589257, device='cuda:0') tensor(38, device='cuda:0')
Chosen edges to del emb: tensor([[   0,    0,    0,  ..., 1680, 1680, 1680],
        [  37,   38,   39,  ...,  745,  746,  747]], device='cuda:0',
       dtype=torch.int32) 589257
Chosen edges to del exp: tensor([[   2,    2,    2,    2,    2,    2,    6,    2,    6,    2,    6,    6,
            6,    6,    2,    2,    2,    2,    6,    2,    2,    2,    2,    2,
            2,    2,    2,    6,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2],
        [ 786,  838,  850,  864,  876,  984,  984,  996,  996, 1008, 1008, 1096,
         1391, 1431, 1564, 1565, 1567, 1623, 1623, 1624, 1639, 1673, 1690, 1708,
         1725, 1726, 1743, 1953, 1995, 2123, 2141, 2196, 2211, 2230, 2249, 225

100%|██████████| 469/469 [00:10<00:00, 44.24it/s]


Epoch 13/64, Train Loss: 0.5042, Val Loss: 0.4917, Val Accuracy: 0.8296


100%|██████████| 469/469 [00:10<00:00, 44.61it/s]


Epoch 14/64, Train Loss: 0.4475, Val Loss: 0.4610, Val Accuracy: 0.8407


100%|██████████| 469/469 [00:10<00:00, 46.19it/s]


Epoch 15/64, Train Loss: 0.4198, Val Loss: 0.4428, Val Accuracy: 0.8457


100%|██████████| 469/469 [00:10<00:00, 43.71it/s]


Epoch 16/64, Train Loss: 0.4008, Val Loss: 0.4275, Val Accuracy: 0.8509


100%|██████████| 469/469 [00:10<00:00, 44.48it/s]


Epoch 17/64, Train Loss: 0.3865, Val Loss: 0.4157, Val Accuracy: 0.8519


100%|██████████| 469/469 [00:10<00:00, 44.36it/s]


Epoch 18/64, Train Loss: 0.3754, Val Loss: 0.4055, Val Accuracy: 0.8552


100%|██████████| 469/469 [00:10<00:00, 44.55it/s]


Epoch 19/64, Train Loss: 0.3650, Val Loss: 0.4005, Val Accuracy: 0.8583
Chosen edges: tensor([[   4,    6,    2,    0,    4,    2,    4,    6,    2],
        [ 786,  786, 1607, 1623, 1623, 2017, 2017, 2017, 2068]],
       device='cuda:0') 9


 10%|▉         | 45/469 [00:01<00:09, 44.37it/s]