In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from pathlib import Path

import torch

baseline_path = Path("/bigdata/cifar10/logs/garipov/baselines/1647358615/gar_18_dNone_128_sgd_l0.1_g0.1_w0.0_sTrue")
baseline_model = torch.load(baseline_path / "cnn_best.pth")
with open(baseline_path/'results.json') as json_file:
    baseline_result = json.load(json_file)
baseline_result



{'best_epoch': 38,
 'best_train_acc': 0.9749333333333333,
 'best_valid_acc': 0.9078,
 'best_valid_loss': 0.003568341651931405,
 'test_acc': 0.891,
 'test_loss': 0.004007616302371025,
 'n_param': 557642,
 'model_name': 'gar_18_dNone_128_sgd_l0.1_g0.1_w0.0_sTrue'}

In [3]:
baseline_model

GaripovNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=128, out_features=1

In [5]:
from tddl.factorizations import number_layers, listify_numbered_layers, get_weights

numbered_layers = number_layers(baseline_model)
gar_layers = [2,4,6,8,10]
layers = listify_numbered_layers(numbered_layers, layer_nrs=gar_layers)

In [6]:
weights = get_weights(baseline_model, gar_layers)

In [14]:
from tddl.dbs import find_rank_given_error

find_rank_given_error(layers[0][2], desired_error = 0.5)



(FactorizedConv(
   in_channels=64, out_channels=64, kernel_size=(3, 3), rank=(23, 23, 3, 3), order=2, padding=[1, 1], 
   (weight): TuckerTensor(shape=(64, 64, 3, 3), rank=(23, 23, 3, 3))
 ),
 0.203125,
 tensor(0.5003, device='cuda:0'))

In [49]:
from tddl.dbs import compress_layers_with_desired_error

compress_layers_with_desired_error(layers, desired_error=0.5)

(0.8260236135181975, [0.203125, 0.28125, 0.4375, 0.375, 0.02734375])

In [7]:
from tddl.dbs import find_error_given_c
from tddl.utils.model_stats import count_parameters

baseline_count = count_parameters(baseline_model)
ranks, c, error = find_error_given_c(layers, desired_c = 0.5, baseline_count=baseline_count)

0.5




0.2735034305163528
0.22649656948364721
True
----------
0.25
0.6774041410080303
-0.17740414100803026
True
----------
0.375
0.4269280290939348
0.07307197090606521
True
----------
0.3125
0.5415122964195667
-0.04151229641956666
True
----------
0.34375
0.48262146681921375
0.01737853318078625
True
----------
0.328125
0.5128182597437065
-0.01281825974370654
True
----------
0.3359375
0.5011727954494102
-0.00117279544941018
False


In [8]:
ranks

[0.4375, 0.5625, 0.75, 0.65625, 0.09375]

In [43]:
import numpy as np


rank = 0.5
errors = np.array([])
for layer in layers:
    with torch.no_grad():
        fact_layer, error = factorize_layer(layer[2], 'tucker', rank, return_error=True)
    errors = np.append(errors, float(error.cpu().numpy()))

In [46]:
error = errors.mean()
max_error = errors.max()
min_error = errors.min()

In [47]:
find_error_given_c(layers, desired_c = 0.5, error=error, max_error=max_error, min_error=min_error)

0.35240626633167266
-0.41636698440207964
0.9163669844020796
True
----------
0.4085878893733025
-0.1684683708838821
0.6684683708838821
True
----------
0.43667870089411737
-0.07165294627383023
0.5716529462738302
True
----------
0.4507241066545248
0.0006878249566724115
0.4993121750433276
True
----------
0.4577468095347285
0.035934792027729645
0.46406520797227035
True
----------
0.4612581609748304
0.043224653379549394
0.4567753466204506
True
----------
0.46301383669488133
0.06800259965337951
0.4319974003466205
True
----------
0.46389167455490676
0.06800259965337951
0.4319974003466205
True
----------
0.4643305934849195
0.06800259965337951
0.4319974003466205
True
----------
0.4645500529499259
0.06800259965337951
0.4319974003466205
True
----------
0.4646597826824291
0.06800259965337951
0.4319974003466205
True
----------
0.46471464754868064
0.06800259965337951
0.4319974003466205
True
----------
0.46474207998180644
0.06800259965337951
0.4319974003466205
True
----------
0.4647557961983694
0.0680

KeyboardInterrupt: 

In [9]:
import copy
from tddl.factorizations import factorize_network, factorize_network_with_ranks

factorization = 'tucker'


fact_model = copy.deepcopy(baseline_model)

layer_nrs = [layer[1] for layer in layers]

fact_model = factorize_network_with_ranks(fact_model, layer_nrs, ranks, factorization=factorization)

# for layer, rank in zip(layer_nrs, ranks):
#         print(layer)
#         print(rank)
#         layer_nr = layer
#         factorize_network(fact_model, layers=[layer_nr], rank=rank, factorization=factorization)

2
0.4375
4
0.5625
6
0.75
8
0.65625
10
0.09375


In [10]:
number_layers(fact_model)

{'conv1': (0,
  Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))),
 'conv1_bn': (1,
  BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'conv2': (2,
  FactorizedConv(
    in_channels=64, out_channels=64, kernel_size=(3, 3), rank=(36, 36, 3, 3), order=2, padding=[1, 1], 
    (weight): TuckerTensor(shape=(64, 64, 3, 3), rank=(36, 36, 3, 3))
  )),
 'conv2_bn': (3,
  BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'conv3': (4,
  FactorizedConv(
    in_channels=64, out_channels=128, kernel_size=(3, 3), rank=(80, 40, 3, 3), order=2, padding=[1, 1], 
    (weight): TuckerTensor(shape=(128, 64, 3, 3), rank=(80, 40, 3, 3))
  )),
 'conv3_bn': (5,
  BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 'conv4': (6,
  FactorizedConv(
    in_channels=128, out_channels=128, kernel_size=(3, 3), rank=(98, 98, 3, 3), order=2, padding=[1, 1], 
    (weight): TuckerTensor(shape=(128, 128, 3, 3)

In [98]:
count_parameters(fact_model)

283581

In [99]:
baseline_count = count_parameters(baseline_model)

In [100]:
count_parameters(fact_model)/count_parameters(baseline_model)

0.5085359424146675

In [95]:
ranks

[0.4375, 0.5625, 0.75, 0.65625, 0.09375]

In [None]:
ranks