In [1]:
import copy
from time import time
from pathlib import Path

import torch
import tltorch
from torchsummary import summary
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from tddl.trainer import Trainer

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Load model
model = torch.load("/home/jetzeschuurman/gitProjects/phd/tddl/artifacts/1625154185/model_52")


In [4]:
print(model)

ModifiedVGG16Model(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, paddin

In [5]:
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [6]:
conv = model.features[19]
# print(conv)

factorization = 'tucker'
decompose_weights = True
with_init = not decompose_weights

fact_conv = tltorch.FactorizedConv.from_conv(conv, rank=0.5, decompose_weights=decompose_weights, factorization=factorization)

if with_init:
    fact_conv.weight.normal_(0,0.02)
# print(fact_conv)

fact_model = copy.deepcopy(model)
fact_model.features[19] = fact_conv
# print(fact_model)

epochs = 10
lr = 1e-3

logdir="/home/jetzeschuurman/gitProjects/phd/tddl/artifacts"

t = round(time())
logdir = Path(logdir).joinpath(str(t))
save = {
    "save_every_epoch": 1,
    "save_location": str(logdir),
    "save_best": True,
    "save_final": True,
    "save_model_name": "fact_model_conv_19_"+factorization+f"_{lr}_"+str(with_init)
}

train_path = "/bigdata/dogs-vs-cats/train/"
valid_path = "/bigdata/dogs-vs-cats/valid/"

writer = SummaryWriter(log_dir=logdir.joinpath('runs'))

optimizer = optim.SGD(fact_model.parameters(), lr=lr, momentum=0.99)
trainer = Trainer(train_path, valid_path, fact_model, optimizer, writer, save=save, batch_size=32)

train_acc = trainer.test(loader="train")
writer.add_scalar("Accuracy/before_finetuning/train", train_acc)
# print(f'{train_acc = }')

valid_acc = trainer.test()
# print(f'{valid_acc = }')
writer.add_scalar("Accuracy/before_finetuning/valid", valid_acc)

trainer.train(epochs=epochs)

writer.close()

100%|██████████| 625/625 [00:34<00:00, 18.14it/s]
100%|██████████| 63/63 [00:03<00:00, 17.20it/s]
625it [01:48,  5.77it/s, loss=0.315]
100%|██████████| 625/625 [00:34<00:00, 18.09it/s]
100%|██████████| 63/63 [00:03<00:00, 17.10it/s]
625it [01:48,  5.77it/s, loss=0.122]
100%|██████████| 625/625 [00:34<00:00, 18.01it/s]
100%|██████████| 63/63 [00:03<00:00, 17.23it/s]
625it [01:48,  5.78it/s, loss=0.29]
100%|██████████| 625/625 [00:34<00:00, 18.07it/s]
100%|██████████| 63/63 [00:03<00:00, 17.14it/s]
625it [01:48,  5.78it/s, loss=0.134]
100%|██████████| 625/625 [00:34<00:00, 18.04it/s]
100%|██████████| 63/63 [00:03<00:00, 17.21it/s]
625it [01:48,  5.77it/s, loss=0.222]
100%|██████████| 625/625 [00:34<00:00, 18.03it/s]
100%|██████████| 63/63 [00:03<00:00, 17.26it/s]
625it [01:48,  5.78it/s, loss=0.168]
100%|██████████| 625/625 [00:34<00:00, 18.12it/s]
100%|██████████| 63/63 [00:03<00:00, 17.02it/s]
625it [01:48,  5.77it/s, loss=0.262]
100%|██████████| 625/625 [00:34<00:00, 18.07it/s]
100%|█

In [3]:
lin = model.classifier[1]
print(lin)


Linear(in_features=25088, out_features=4096, bias=True)


In [9]:
from tddl.utils.prime_factors import get_prime_factors

logdir="/home/jetzeschuurman/gitProjects/phd/tddl/artifacts"
t = round(time())
logdir = Path(logdir).joinpath(str(t))
writer = SummaryWriter(log_dir=logdir.joinpath('runs'))

layer = 'lin_32'

factorization = 'tucker'

decompose_weights = False
writer.add_scalar("Hyperparameters/init/decompose_weights", int(decompose_weights))

# fact_lin = tltorch.FactorizedLinear(
#     in_tensorized_features=get_prime_factors(lin.in_features),
#     out_tensorized_features=get_prime_factors(lin.out_features),
#     factorization=factorization,
#     rank=0.5,
# )
fact_lin = tltorch.FactorizedLinear(
    in_tensorized_features=(lin.in_features,),
    out_tensorized_features=(lin.out_features,),
    factorization=factorization,
    rank=0.5,
)

if not decompose_weights:
    mean = 0
    std = 0.02
    fact_lin.weight.normal_(mean, std)
    writer.add_scalar("Hyperparameters/init/mean", mean)
    writer.add_scalar("Hyperparameters/init/std", std)

# print(fact_conv)

# fact_model = copy.deepcopy(model)
model.classifier[1] = fact_lin
# print(fact_model)

epochs = 10
lr = 1e-3
writer.add_scalar("Hyperparameters/lr", lr)

save = {
    "save_every_epoch": 1,
    "save_location": str(logdir),
    "save_best": True,
    "save_final": True,
    "save_model_name": "fact_model_" + layer +"_"+factorization
}

train_path = "/bigdata/dogs-vs-cats/train/"
valid_path = "/bigdata/dogs-vs-cats/valid/"



optimizer = optim.SGD(model.classifier.parameters(), lr=lr, momentum=0.99)
trainer = Trainer(train_path, valid_path, fact_model, optimizer, writer, save=save)

train_acc = trainer.test(loader="train")
writer.add_scalar("Accuracy/before_finetuning/train", train_acc)
# print(f'{train_acc = }')

valid_acc = trainer.test()
# print(f'{valid_acc = }')
writer.add_scalar("Accuracy/before_finetuning/valid", valid_acc)

trainer.train(epochs=epochs)

writer.close()

RuntimeError: CUDA out of memory. Tried to allocate 392.00 MiB (GPU 0; 11.78 GiB total capacity; 9.75 GiB already allocated; 370.94 MiB free; 10.01 GiB reserved in total by PyTorch)

In [11]:
in_features=25088
out_features=4096

In [9]:
fact_model.features.parameters()

<generator object Module.parameters at 0x7fa3539a87b0>

In [10]:
m2 = copy.deepcopy(model)

In [11]:
for param in m2.parameters():
    param.requires_grad = False

In [13]:
filter(lambda p: p.requires_grad, m2.parameters())

StopIteration: 

In [19]:
optimizer = optim.SGD(
    filter(lambda p: p.requires_grad, m2.parameters()), 
    lr=lr, 
    momentum=0.99
)

In [18]:
layer_nr = 19
rank = 0.5

conv = model.features[layer_nr]
fact_conv = tltorch.FactorizedConv.from_conv(conv, rank=rank, decompose_weights=decompose_weights, factorization=factorization)
m2.features[layer_nr] = fact_conv



In [20]:
"feat" in "features"

True