In [1]:
# mlflow ui --port 6010 --backend-store-uri file:/share/lazy/will/ConstrastiveLoss/Logs
# watch -n 0.5 nvidia-smi

In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from torchvision import datasets, transforms, utils

from VQVAE import VQVAE_Encoder as small_model
from VQVAE import VQVAE as big_model
from VQVAE import Decoder, Encoder, Quantize

from train import knowledge_distillation
from train import train
from utilities import start_mlflow_experiment, Params, save_to_mlflow, count_parameters, load_full_state, select_gpu

from tqdm import tqdm
import mlflow

In [3]:
# %%writefile architecture.txt

class VQVAE_Encoder(nn.Module):
    '''
    params: in_channel=3, channel=64, n_res_block=2, n_res_channel=32, embed_dim=64, n_embed=512, decay=0.99
    '''
    def __init__(
        self,
        in_channel=3,
        channel=128,
        n_res_block=2,
        n_res_channel=32,
        embed_dim=64,
        n_embed=512,
        decay=0.99
    ):
        '''
        params: embed_t, embed_b, in_channel=3, channel=64, n_res_block=2, n_res_channel=32, embed_dim=64, n_embed=512, decay=0.99
        '''
        super().__init__()
        # Encoders, first one should have two rounds of downsampling, second should have one
        self.enc_b = Encoder(in_channel=in_channel, channel=channel, extra_layers=1, stride=2, kernel_size=5, residual=False, extra_residual_blocks=1, downsample='Once')
        self.enc_t = Encoder(in_channel=channel, channel=channel, extra_layers=1, stride=1, kernel_size=3, residual=False, extra_residual_blocks=1, downsample='Once')

        self.quantize_conv_t = nn.Conv2d(channel, embed_dim, 1)
        self.quantize_t = Quantize(embed_dim, n_embed)

        # Decoders
        self.dec_t = Decoder(embed_dim, embed_dim, channel, extra_residual_blocks = 1, upsample='Once')
        self.quantize_conv_b = nn.Conv2d(embed_dim + channel, embed_dim, 1)
        self.quantize_b = Quantize(embed_dim, n_embed)
#         self.upsample_t = nn.ConvTranspose2d(embed_dim, embed_dim, 4, stride=2, padding=1)
#         self.dec = Decoder(embed_dim + embed_dim, in_channel, extra_layers=2, extra_residual_blocks=2, upsample='Twice')

    def forward(self, input):
        quant_t, quant_b, diff, _, _ = self.encode(input)
#         dec = self.decode(quant_t, quant_b)
#         return dec, diff
        return diff
    
    def encode(self, input):
        enc_b = self.enc_b(input)
        enc_t = self.enc_t(enc_b)

        quant_t = self.quantize_conv_t(enc_t).permute(0, 2, 3, 1)
        quant_t, diff_t, id_t = self.quantize_t(quant_t)
        quant_t = quant_t.permute(0, 3, 1, 2)
        diff_t = diff_t.unsqueeze(0)

        dec_t = self.dec_t(quant_t)
        enc_b = torch.cat([dec_t, enc_b], 1)

        quant_b = self.quantize_conv_b(enc_b).permute(0, 2, 3, 1)
        quant_b, diff_b, id_b = self.quantize_b(quant_b)
        quant_b = quant_b.permute(0, 3, 1, 2)
        diff_b = diff_b.unsqueeze(0)

        return quant_t, quant_b, diff_t + diff_b, id_t, id_b

In [4]:
##### device = select_gpu(1)
args = Params(16, 10, 4e-4, 256, 'cuda:0')

start_mlflow_experiment('VQVAE2 Knowledge distillation', 'lane-finder')


transform = transforms.Compose([
        transforms.Resize(args.size),
        transforms.CenterCrop(args.size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ])

dataset = datasets.ImageFolder('/share/lazy/will/ConstrastiveLoss/Imgs/color_images/train/', transform=transform)
loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, pin_memory = True)

# teacher_model = big_model(channel=128).to(args.device)

# optimizer declaration does nothing
# optimizer = optim.Adam(teacher_model.parameters(), lr=args.lr)
# load_full_state(teacher_model, optimizer, '/share/lazy/will/ConstrastiveLoss/Logs/0/64a43ca191944cba89536145c4422027/artifacts/run_stats.pyt', freeze_weights=False)

student_model = VQVAE().to(args.device)

optimizer = optim.Adam(student_model.parameters(), lr=args.lr)

run_name = 'NOT a knowledge distillation run!'

with mlflow.start_run(run_name = run_name) as run:

    for epoch in range(args.epoch):
#         results = knowledge_distillation(epoch, loader, teacher_model, student_model, optimizer, args.device)
        results = train(epoch, loader, student_model, optimizer, args.device)
        for Dict in results:
            save_to_mlflow(Dict, args)

            mlflow.log_artifact('architecture.txt')

  result,i = ctx.saved_variables
epoch: 1; mse: 0.00483; latent: 0.011; avg mse: 0.01018; lr: 0.00040:  75%|███████▌  | 2740/3642 [42:10<14:29,  1.04it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

epoch: 2; mse: 0.00197; latent: 0.006; avg mse: 0.00277; lr: 0.00040: 100%|██████████| 3642/3642 [59:28<00:00,  1.02it/s]
epoch: 3; mse: 0.00163; latent: 0.004; avg mse: 0.00198; lr: 0.00040:  80%|███████▉  | 2900/3642 [52:09<13:20,  1.08s/it] 


FileNotFoundError: [Errno 2] No such file or directory: 'samples/00003_02900.jpg'

In [None]:
buffers_s = [i for i in student_model.quantize_b.buffers()]

In [None]:
buffers_s[0]

In [None]:
buffers_t = [i for i in teacher_model.quantize_b.buffers()]

In [None]:
buffers_t[0]