In [1]:
!pip install wandb -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.6/175.6 KB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sentry-sdk (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [2]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [None]:
from torch.utils.data import Dataset
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, Lambda, ToTensor


class MNISTDataset(Dataset):
    """MNIST dataset.

    Feature images are automatically flattened.

    Parameters
    ----------
    root : str
        Directory where the actual data is located (or downloaded to).

    train : bool
        If True the training set is returned (60_000 samples). Otherwise
        the validation set is returned (10_000 samples).

    Attributes
    ----------
    tv_dataset : MNIST
        Instance of the torchvision `MNIST` dataset class.
    """

    def __init__(self, root, train=True, download=True):
        transform = Compose(
            [
                ToTensor(),
                Lambda(lambda x: x.ravel()),
            ]
        )

        self.tv_dataset = MNIST(
            root,
            train=train,
            download=download,
            transform=transform,
        )

    def __len__(self):
        """Get the length of the dataset."""
        return len(self.tv_dataset)

    def __getitem__(self, ix):
        """Get a selected sample.

        Parameters
        ----------
        ix : int
            Index of the sample to get.

        Returns
        -------
        x : torch.Tensor
            Flattened feature tensor of shape `(784,)`.

        y : torch.Tensor
            Scalar representing the ground truth label. Number between 0 and 9.
        """
        return self.tv_dataset[ix]


In [None]:
import math

import torch
import torch.nn as nn
from torch.nn.utils.prune import l1_unstructured, random_unstructured


class MLP(nn.Module):
    """Multilayer perceptron.

    The bias is included in all linear layers.

    Parameters
    ----------
    n_features : int
        Number of input features (pixels inside of MNIST images).

    hidden_layer_sizes : tuple
        Tuple of ints representing sizes of the hidden layers.

    n_targets : int
        Number of target classes (10 for MNIST).

    Attributes
    ----------
    module_list : nn.ModuleList
        List holding all the linear layers in the right order.
    """

    def __init__(self, n_features, hidden_layer_sizes, n_targets):
        super().__init__()

        layer_sizes = (n_features,) + hidden_layer_sizes + (n_targets,)
        layer_list = []

        for i in range(len(layer_sizes) - 1):
            layer_list.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))

        self.module_list = nn.ModuleList(layer_list)

    def forward(self, x):
        """Run the forward pass.

        Parameters
        ----------
        x : torch.Tensor
            Batch of features of shape `(batch_size, n_features)`.

        Returns
        -------
        torch.Tensor
            Batch of predictions (logits) of shape `(batch_size, n_targets)`.
        """
        n_layers = len(self.module_list)

        for i, layer in enumerate(self.module_list):
            x = layer(x)

            if i < n_layers - 1:
                x = nn.functional.relu(x)

        return x


def prune_linear(linear, prune_ratio=0.3, method="l1"):
    """Prune a linear layer.

    Modifies the module in-place. We make an assumption that the bias
    is included.

    Parameters
    ----------
    linear : nn.Linear
        Linear module containing a bias.

    prune_ratio : float
        Number between 0 and 1 representing the percentage of weights
        to prune.

    method : str, {"l1", "random"}
        Pruning method to use.
    """
    if method == "l1":
        prune_func = l1_unstructured
    elif method == "random":
        prune_func = random_unstructured
    else:
        raise ValueError

    prune_func(linear, "weight", prune_ratio)
    prune_func(linear, "bias", prune_ratio)


def prune_mlp(mlp, prune_ratio=0.3, method="l1"):
    """Prune each layer of the multilayer perceptron.

    Modifies the module in-place. We make an assumption that each
    linear layer has the bias included.

    Parameters
    ----------
    mlp : MLP
        Multilayer perceptron instance.

    prune_ratio : float or list
        Number between 0 and 1 representing the percentage of weights
        to prune. If `list` then different ratio for each
        layer.

    method : str, {"l1", "random"}
        Pruning method to use.
    """
    if isinstance(prune_ratio, float):
        prune_ratios = [prune_ratio] * len(mlp.module_list)
    elif isinstance(prune_ratio, list):
        if len(prune_ratio) != len(mlp.module_list):
            raise ValueError("Incompatible number of prune ratios provided")

        prune_ratios = prune_ratio
    else:
        raise TypeError

    for prune_ratio, linear in zip(prune_ratios, mlp.module_list):
        prune_linear(linear, prune_ratio=prune_ratio, method=method)


def check_pruned_linear(linear):
    """Check if a Linear module was pruned.

    We require both the bias and the weight to be pruned.

    Parameters
    ----------
    linear : nn.Linear
        Linear module containing a bias.

    Returns
    -------
    bool
        True if the model has been pruned.
    """
    params = {param_name for param_name, _ in linear.named_parameters()}
    expected_params = {"weight_orig", "bias_orig"}

    return params == expected_params


def reinit_linear(linear):
    """Reinitialize a linear layer.

    This is an in-place operation.
    If the module has some pruning logic we are not going to remove it
    and we only initialize the underlying tensors - `weight_orig` and
    `bias_orig`.

    Parameters
    ----------
    linear : nn.Linear
        Linear model containing a bias.
    """
    is_pruned = check_pruned_linear(linear)

    # Get parameters of interest
    if is_pruned:
        weight = linear.weight_orig
        bias = linear.bias_orig
    else:
        weight = linear.weight
        bias = linear.bias

    # Initialize weight
    nn.init.kaiming_uniform_(weight, a=math.sqrt(5))

    # Initialize bias
    fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weight)
    bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
    nn.init.uniform_(bias, -bound, bound)


def reinit_mlp(mlp):
    """Reinitialize all layers of the MLP.

    Parameters
    ----------
    mlp : MLP
        Multi-layer perceptron.
    """
    for linear in mlp.module_list:
        reinit_linear(linear)


def copy_weights_linear(linear_unpruned, linear_pruned):
    """Copy weights from an unpruned model to a pruned model.

    Modifies `linear_pruned` in place.

    Parameters
    ----------
    linear_unpruned : nn.Linear
        Linear model with a bias that was not pruned.

    linear_pruned : nn.Linear
        Linear model with a bias that was pruned.
    """
    assert check_pruned_linear(linear_pruned)
    assert not check_pruned_linear(linear_unpruned)

    with torch.no_grad():
        linear_pruned.weight_orig.copy_(linear_unpruned.weight)
        linear_pruned.bias_orig.copy_(linear_unpruned.bias)


def copy_weights_mlp(mlp_unpruned, mlp_pruned):
    """Copy weights of an unpruned network to a pruned network.

    Modifies `mlp_pruned` in place.

    Parameters
    ----------
    mlp_unpruned : MLP
        MLP model that was not pruned.

    mlp_pruned : MLP
        MLP model that was pruned.
    """
    zipped = zip(mlp_unpruned.module_list, mlp_pruned.module_list)

    for linear_unpruned, linear_pruned in zipped:
        copy_weights_linear(linear_unpruned, linear_pruned)


def compute_stats(mlp):
    """Compute important statistics related to pruning.

    Parameters
    ----------
    mlp : MLP
        Multilayer perceptron.

    Returns
    -------
    dict
        Statistics.
    """
    stats = {}
    total_params = 0
    total_pruned_params = 0

    for layer_ix, linear in enumerate(mlp.module_list):
        assert check_pruned_linear(linear)

        weight_mask = linear.weight_mask
        bias_mask = linear.bias_mask

        params = weight_mask.numel() + bias_mask.numel()
        pruned_params = (weight_mask == 0).sum() + (bias_mask == 0).sum()

        total_params += params
        total_pruned_params += pruned_params

        stats[f"layer{layer_ix}_total_params"] = params
        stats[f"layer{layer_ix}_pruned_params"] = pruned_params
        stats[f"layer{layer_ix}_actual_prune_ratio"] = pruned_params / params

    stats["total_params"] = total_params
    stats["total_pruned_params"] = total_pruned_params
    stats["actual_prune_ratio"] = total_pruned_params / total_params

    return stats


In [None]:
import torch
import torch.nn as nn
import tqdm
from torch.utils.data import DataLoader

import wandb
# from data import MNISTDataset
# from utils import MLP, compute_stats, copy_weights_mlp, prune_mlp, reinit_mlp


def loop_dataloader(dataloader):
    """Loop infinitely over a dataloader.

    Parameters
    ----------
    dataloader : DataLoader
        DataLoader streaming batches of samples.

    Yields
    ------
    X_batch : torch.Tensor
        Batch of features.

    y_batch : torch.Tensor
        Batch of predictions.
    """
    while True:
        for x in iter(dataloader):
            yield x


def train(model, dataloader_train, loss_inst, optimizer, max_iter=10_000,
          dataloader_val=None, val_freq=500):
    """Run the training loop.

    Parameters
    ----------
    model : nn.Module
        Neural network (in our case MLP).

    dataloader_train : DataLoader
        Dataloader yielding training samples.

    loss_inst : callable
        Computes the loss when called.

    optimizer : torch.optim.Optimizer
        Instance of an optimizer.

    max_iter : int
        The number of iterations we run the training for
        (= number of graident descent steps).

    dataloader_val : None or DataLoader
        Dataloader yielding validation samples. If provided it will
        also single to us that we want to track metrics.

    val_freq : int
        How often evaluation run.
    """
    global device
    iterable = loop_dataloader(dataloader_train)
    iterable = tqdm.tqdm(iterable, total=max_iter)
    it = 0
    for X_batch, y_batch in iterable:
        if it == max_iter:
            break
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        logit_batch = model(X_batch)

        loss = loss_inst(logit_batch, y_batch)
        if dataloader_val is not None:
            wandb.log({"loss": loss}, step=it)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if it % val_freq == 0 and dataloader_val is not None:
            is_equal = []

            for X_batch_val, y_batch_val in dataloader_val:
                X_batch_val = X_batch_val.to(device)
                y_batch_val = y_batch_val.to(device)
                is_equal.append(
                    model(X_batch_val).argmax(dim=-1) == y_batch_val
                )

            is_equal_t = torch.cat(is_equal)
            acc = is_equal_t.sum() / len(is_equal_t)
            wandb.log({"accuracy_val": acc}, step=it)

        it += 1



def experiment(MAX_ITERS, PRUNE_ITERS, PRUNE_METHODS, PRUNE_RATIOS, REINITIALIZES , RANDOM_STATES, project_name, start_from):
    args = {"max-iter" : 50000, "batch-size" : 64, "prune-iter" : 1,
          "prune-method" : "l1", "prune-ratio" : 0.2, "val-freq" : 250,
          "reinitialize" : "false", "random-state" : 1}
    i = 0;
    for max_iter in MAX_ITERS:
      for prune_iter in PRUNE_ITERS:
        for prune_method in PRUNE_METHODS:
          for prune_ratio in PRUNE_RATIOS:
            for reinit in REINITIALIZES:
              for random_state in RANDOM_STATES:
                if i < start_from:
                  i += 1
                  continue
                args["max-iter"] = max_iter
                args["prune-iter"] = prune_iter
                args["prune-method"] = prune_method
                args["prune-ratio"] = prune_ratio
                args["reinitialisalize"] = reinit
                args["random-state"] = random_state

                wandb.init(
                    project=project_name,
                    entity="bspanfilov",
                    config=args,
                )
                wandb.define_metric("accuracy_val", summary="max")

                dataset_train = MNISTDataset(
                    "data",
                    train=True,
                    download=True,
                )
                dataset_val = MNISTDataset(
                    "data",
                    train=False,
                    download=True,
                )

                if args["random-state"] is not None:
                    torch.manual_seed(args["random-state"])

                dataloader_train = DataLoader(
                    dataset_train, batch_size=args["batch-size"], shuffle=True
                )
                dataloader_val = DataLoader(
                    dataset_val, batch_size=args["batch-size"], shuffle=True
                )

                kwargs = dict(
                    n_features=28 * 28,
                    hidden_layer_sizes=(300, 100),
                    n_targets=10,
                )

                mlp = MLP(**kwargs).to(device)

                mlp_copy = MLP(**kwargs).to(device)
                mlp_copy.load_state_dict(mlp.state_dict())

                loss_inst = nn.CrossEntropyLoss()
                optimizer = torch.optim.Adam(mlp.parameters(), lr=1.2 * 1e-3)

                # Train and prune loop
                if args["prune-ratio"] > 0:
                    per_round_prune_ratio = 1 - (1 - args["prune-ratio"]) ** (1 / args["prune-iter"])

                    per_round_prune_ratios = [per_round_prune_ratio] * len(mlp.module_list)
                    per_round_prune_ratios[-1] /= 2

                    per_round_max_iter = int(args["max-iter"] / args["prune-iter"])

                    for prune_it in range(args["prune-iter"]):
                        train(
                            mlp,
                            dataloader_train,
                            loss_inst,
                            optimizer,
                            max_iter=per_round_max_iter,
                        )
                        prune_mlp(mlp, per_round_prune_ratios, method=args["prune-method"])

                        copy_weights_mlp(mlp_copy, mlp)
                        # надо будет добавить эксперементов без копирования, вдруг тоже что-то получиться
                        # а пока что фигачу кучу моделей на mnist

                        stats = compute_stats(mlp)
                        for name, stat in stats.items():
                            summary_name = f"{name}_pruneiter={prune_it}"
                            wandb.run.summary[summary_name] = stat

                if args["reinitialize"] == "true":
                    reinit_mlp(mlp)
                # Run actual training with a final pruned network
                train(
                    mlp,
                    dataloader_train,
                    loss_inst,
                    optimizer,
                    max_iter=args["max-iter"],
                    dataloader_val=dataloader_val,
                    val_freq=args["val-freq"],
                )




In [None]:
# тут просто настройки, с которыми я как раз запускал эксперименты
MAX_ITERS=[13000]
PRUNE_ITERS=[1, 5, 8]
PRUNE_METHODS=['l1', 'random']
PRUNE_RATIOS=[0.97]
REINITIALIZES=['false', 'true']
RANDOM_STATES=[1, 3]

experiment(MAX_ITERS, PRUNE_ITERS, PRUNE_METHODS, PRUNE_RATIOS, REINITIALIZES, RANDOM_STATES, "experiment from the title", 0)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



100%|██████████| 13000/13000 [02:20<00:00, 92.55it/s] 
100%|██████████| 13000/13000 [03:52<00:00, 56.02it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▆▇▇████████████████████████████████████
loss,█▃▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▂▁▂▁▂▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:15<00:00, 95.91it/s] 
100%|██████████| 13000/13000 [03:54<00:00, 55.37it/s]


0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▃▁▂▃▂▂▂▁▁▂▁▂▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▃▁▁▁▂▂▂▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:17<00:00, 94.54it/s]
100%|██████████| 13000/13000 [03:54<00:00, 55.38it/s]


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.081503…

0,1
accuracy_val,▁▆▇▇████████████████████████████████████
loss,█▃▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▂▁▂▁▂▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:20<00:00, 92.25it/s]
100%|██████████| 13000/13000 [03:55<00:00, 55.30it/s]


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.081495…

0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▃▁▂▃▂▂▂▁▁▂▁▂▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▃▁▁▁▂▂▂▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:18<00:00, 93.73it/s]
100%|██████████| 13000/13000 [03:51<00:00, 56.13it/s]


0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▃▃▄▂▂▂▂▂▂▃▃▂▂▂▂▂▁▃▂▄▁▂▄▁▂▁▃▁▂▁▂▁▁▂▂▂▁▁▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:20<00:00, 92.29it/s] 
100%|██████████| 13000/13000 [03:53<00:00, 55.67it/s]


0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▄▂▄▅▂▃▄▂▁▃▂▂▃▁▂▂▂▂▃▂▁▂▂▃▁▂▂▂▃▁▂▃▁▁▂▂▂▂▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:18<00:00, 93.99it/s] 
100%|██████████| 13000/13000 [03:51<00:00, 56.12it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▃▃▄▂▂▂▂▂▂▃▃▂▂▂▂▂▁▃▂▄▁▂▄▁▂▁▃▁▂▁▂▁▁▂▂▂▁▁▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 13000/13000 [02:21<00:00, 91.82it/s]
100%|██████████| 13000/13000 [03:57<00:00, 54.81it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▇▇█████████████████████████████████████
loss,█▄▂▄▅▂▃▄▂▁▃▂▂▃▁▂▂▂▂▃▂▁▂▂▃▁▂▂▂▃▁▂▃▁▁▂▂▂▂▂

0,1
actual_prune_ratio_pruneiter=0,0.96816
layer0_actual_prune_ratio_pruneiter=0,0.97
layer0_pruned_params_pruneiter=0,228435.0
layer0_total_params_pruneiter=0,235500.0
layer1_actual_prune_ratio_pruneiter=0,0.97
layer1_pruned_params_pruneiter=0,29197.0
layer1_total_params_pruneiter=0,30100.0
layer2_actual_prune_ratio_pruneiter=0,0.48515
layer2_pruned_params_pruneiter=0,490.0
layer2_total_params_pruneiter=0,1010.0


100%|██████████| 2600/2600 [00:28<00:00, 89.91it/s]
100%|██████████| 2600/2600 [00:30<00:00, 86.00it/s]
100%|██████████| 2600/2600 [00:29<00:00, 88.41it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.52it/s] 
100%|██████████| 2600/2600 [00:28<00:00, 90.86it/s]
100%|██████████| 13000/13000 [04:03<00:00, 53.29it/s]


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.093121…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:28<00:00, 92.11it/s] 
100%|██████████| 2600/2600 [00:28<00:00, 91.10it/s] 
100%|██████████| 2600/2600 [00:28<00:00, 89.99it/s]
100%|██████████| 2600/2600 [00:30<00:00, 85.01it/s]
100%|██████████| 2600/2600 [00:29<00:00, 88.65it/s]
100%|██████████| 13000/13000 [04:03<00:00, 53.29it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:28<00:00, 91.45it/s]
100%|██████████| 2600/2600 [00:30<00:00, 86.48it/s]
100%|██████████| 2600/2600 [00:29<00:00, 89.19it/s]
100%|██████████| 2600/2600 [00:28<00:00, 89.69it/s]
100%|██████████| 2600/2600 [00:29<00:00, 88.29it/s]
100%|██████████| 13000/13000 [04:06<00:00, 52.71it/s]


VBox(children=(Label(value='0.001 MB of 0.012 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.065220…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669748033336873, max=1.0…

100%|██████████| 2600/2600 [00:28<00:00, 89.66it/s]
100%|██████████| 2600/2600 [00:30<00:00, 85.81it/s]
100%|██████████| 2600/2600 [00:29<00:00, 88.27it/s]
100%|██████████| 2600/2600 [00:29<00:00, 87.99it/s]
100%|██████████| 2600/2600 [00:29<00:00, 89.37it/s]
100%|██████████| 13000/13000 [04:04<00:00, 53.24it/s]


VBox(children=(Label(value='0.001 MB of 0.012 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.065132…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:28<00:00, 90.76it/s]
100%|██████████| 2600/2600 [00:29<00:00, 87.37it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.22it/s]
100%|██████████| 2600/2600 [00:28<00:00, 89.76it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.34it/s]
100%|██████████| 13000/13000 [04:01<00:00, 53.87it/s]


0,1
accuracy_val,▁▂▆▇▇███████████████████████████████████
loss,█▆▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▂▁▁▂▁▁▁▁▂▁▂▁▁▁▁▁▁▂▂▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:27<00:00, 93.23it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.73it/s]
100%|██████████| 2600/2600 [00:29<00:00, 87.13it/s]
100%|██████████| 2600/2600 [00:28<00:00, 89.80it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.25it/s] 
100%|██████████| 13000/13000 [04:00<00:00, 54.14it/s]


0,1
accuracy_val,▁▁▅▇▇▇██████████████████████████████████
loss,█▇▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:27<00:00, 93.67it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.66it/s]
100%|██████████| 2600/2600 [00:29<00:00, 86.82it/s]
100%|██████████| 2600/2600 [00:28<00:00, 91.37it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.78it/s] 
100%|██████████| 13000/13000 [04:01<00:00, 53.73it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▂▆▇▇███████████████████████████████████
loss,█▆▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▂▁▁▂▁▁▁▁▂▁▂▁▁▁▁▁▁▂▂▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 2600/2600 [00:27<00:00, 93.60it/s] 
100%|██████████| 2600/2600 [00:29<00:00, 88.08it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.15it/s]
100%|██████████| 2600/2600 [00:28<00:00, 90.92it/s]
100%|██████████| 2600/2600 [00:29<00:00, 86.84it/s] 
100%|██████████| 13000/13000 [03:59<00:00, 54.27it/s]


0,1
accuracy_val,▁▁▅▇▇▇██████████████████████████████████
loss,█▇▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂

0,1
actual_prune_ratio_pruneiter=0,0.50311
actual_prune_ratio_pruneiter=1,0.75286
actual_prune_ratio_pruneiter=2,0.8769
actual_prune_ratio_pruneiter=3,0.93855
actual_prune_ratio_pruneiter=4,0.96923
layer0_actual_prune_ratio_pruneiter=0,0.50406
layer0_actual_prune_ratio_pruneiter=1,0.75405
layer0_actual_prune_ratio_pruneiter=2,0.87802
layer0_actual_prune_ratio_pruneiter=3,0.93951
layer0_actual_prune_ratio_pruneiter=4,0.97


100%|██████████| 1625/1625 [00:19<00:00, 83.68it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.51it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.27it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.08it/s]
100%|██████████| 1625/1625 [00:17<00:00, 90.99it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.51it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.83it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.24it/s]
100%|██████████| 13000/13000 [04:04<00:00, 53.27it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


100%|██████████| 1625/1625 [00:17<00:00, 92.34it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.61it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.93it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.81it/s]
100%|██████████| 1625/1625 [00:17<00:00, 90.59it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.88it/s]
100%|██████████| 1625/1625 [00:18<00:00, 87.96it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.53it/s]
100%|██████████| 13000/13000 [04:05<00:00, 52.90it/s]


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.083507…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


100%|██████████| 1625/1625 [00:17<00:00, 92.68it/s]
100%|██████████| 1625/1625 [00:18<00:00, 85.60it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.21it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.56it/s]
100%|██████████| 1625/1625 [00:19<00:00, 84.68it/s]
100%|██████████| 1625/1625 [00:17<00:00, 92.02it/s]
100%|██████████| 1625/1625 [00:19<00:00, 85.51it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.99it/s]
100%|██████████| 13000/13000 [04:07<00:00, 52.52it/s]


VBox(children=(Label(value='0.001 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.056746…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01667030083335703, max=1.0)…

100%|██████████| 1625/1625 [00:17<00:00, 92.80it/s]
100%|██████████| 1625/1625 [00:18<00:00, 88.76it/s]
100%|██████████| 1625/1625 [00:18<00:00, 88.06it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.82it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.16it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.18it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.64it/s]
100%|██████████| 1625/1625 [00:18<00:00, 85.77it/s]
100%|██████████| 13000/13000 [04:07<00:00, 52.53it/s]


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.500596…

0,1
accuracy_val,▁▇██████████████████████████████████████
loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


100%|██████████| 1625/1625 [00:17<00:00, 90.42it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.36it/s]
100%|██████████| 1625/1625 [00:18<00:00, 87.28it/s] 
100%|██████████| 1625/1625 [00:17<00:00, 91.84it/s]
100%|██████████| 1625/1625 [00:18<00:00, 88.67it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.40it/s]
100%|██████████| 1625/1625 [00:17<00:00, 91.84it/s]
100%|██████████| 1625/1625 [00:18<00:00, 87.18it/s]
100%|██████████| 13000/13000 [04:07<00:00, 52.60it/s]


0,1
accuracy_val,▁▂▅▇▇▇██████████████████████████████████
loss,█▆▃▃▂▂▂▂▂▂▂▂▂▂▁▃▁▂▂▂▁▁▁▁▂▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666875529999743, max=1.0)…

100%|██████████| 1625/1625 [00:18<00:00, 88.28it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.86it/s]
100%|██████████| 1625/1625 [00:19<00:00, 85.37it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.99it/s]
100%|██████████| 1625/1625 [00:18<00:00, 88.55it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.28it/s]
100%|██████████| 1625/1625 [00:17<00:00, 90.74it/s]
100%|██████████| 1625/1625 [00:20<00:00, 79.62it/s]
100%|██████████| 13000/13000 [04:11<00:00, 51.75it/s]


0,1
accuracy_val,▁▁▃▇▇▇██████████████████████████████████
loss,██▃▂▂▃▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▁▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▂▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666874500000025, max=1.0)…

100%|██████████| 1625/1625 [00:17<00:00, 90.92it/s] 
100%|██████████| 1625/1625 [00:19<00:00, 85.40it/s]
100%|██████████| 1625/1625 [00:19<00:00, 84.11it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.54it/s]
100%|██████████| 1625/1625 [00:19<00:00, 84.26it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.44it/s]
100%|██████████| 1625/1625 [00:19<00:00, 84.96it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.46it/s]
100%|██████████| 13000/13000 [04:11<00:00, 51.59it/s]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy_val,▁▂▅▇▇▇██████████████████████████████████
loss,█▆▃▃▂▂▂▂▂▂▂▂▂▂▁▃▁▂▂▂▁▁▁▁▂▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁

0,1
actual_prune_ratio_pruneiter=0,0.3542
actual_prune_ratio_pruneiter=1,0.58283
actual_prune_ratio_pruneiter=2,0.73042
actual_prune_ratio_pruneiter=3,0.82572
actual_prune_ratio_pruneiter=4,0.88725
actual_prune_ratio_pruneiter=5,0.92702
actual_prune_ratio_pruneiter=6,0.95271
actual_prune_ratio_pruneiter=7,0.96932
layer0_actual_prune_ratio_pruneiter=0,0.35488
layer0_actual_prune_ratio_pruneiter=1,0.58382


100%|██████████| 1625/1625 [00:18<00:00, 90.26it/s]
100%|██████████| 1625/1625 [00:19<00:00, 83.79it/s]
100%|██████████| 1625/1625 [00:18<00:00, 88.51it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.79it/s]
100%|██████████| 1625/1625 [00:18<00:00, 86.99it/s]
100%|██████████| 1625/1625 [00:18<00:00, 89.58it/s]
100%|██████████| 1625/1625 [00:20<00:00, 78.86it/s]
100%|██████████| 1625/1625 [00:18<00:00, 90.10it/s]
100%|██████████| 13000/13000 [04:14<00:00, 51.09it/s]
