In [1]:
from argparse import Namespace
from model.DeepVSLNet_cbkd import TeacherVSLNetCBDK
from utils.cbkd_helpers import prune_block4, prune_block3, prune_block2
from utils.cbkd_config import CBKDConfig

from copy import deepcopy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def count_parameters(teacher, student):
    def num_params(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    teacher_total = num_params(teacher)
    student_total = num_params(student)

    print(f"Teacher total parameters: {teacher_total:,}")
    print(f"Student total parameters: {student_total:,}")

    reduction = 100 * (1 - student_total / teacher_total)
    print(f"Parameter reduction: {reduction:.2f}%")

In [3]:
configs = Namespace(
    video_feature_dim=256,
    dim=256,
    film_mode="inside_encoder:multi",
    drop_rate=0,
    word_size=300,
    char_size=1000,
    word_dim=300,
    char_dim=50,
    word_vectors=None,
    num_heads=8,
    max_pos_len=128,
    predictor="glove",
)

cbkd_config = CBKDConfig()

model = TeacherVSLNetCBDK(configs=configs, word_vectors=None)

In [4]:
student_2 = deepcopy(model.block2)
pruned_block2 = prune_block2(teacher_featenc=student_2.feature_encoder, keep_ratio_ds=cbkd_config.keep_ratio_block2_ds, keep_ratio_attn=cbkd_config.keep_ratio_block2_attn)
count_parameters(teacher=model.block2, student=pruned_block2)

Teacher total parameters: 964,096
Student total parameters: 723,564
Parameter reduction: 24.95%


In [5]:
student_3 = deepcopy(model.block3)
pruned_block3 = prune_block3(teacher_block3=student_3, keep_ratio_cqa=cbkd_config.keep_ratio_block3_cqa, keep_ratio_concat=cbkd_config.keep_ratio_block3_concat)
count_parameters(model.block3, pruned_block3)

Teacher total parameters: 395,009
Student total parameters: 198,613
Parameter reduction: 49.72%


In [6]:
student_4 = deepcopy(model.block4)
pruned_block4 = prune_block4(teacher_block4=student_4, keep_ratio_enc=cbkd_config.keep_ratio_block4_enc, keep_ratio_pred=cbkd_config.keep_ratio_block4_pred)
count_parameters(teacher=model.block4, student=pruned_block4)

Teacher total parameters: 1,228,290
Student total parameters: 436,041
Parameter reduction: 64.50%


In [7]:
from utils.cbkd_helpers import run_cbkd_stage

# Bottom‐up Stage‐by‐stage distillation
cbkd_config = CBKDConfig()
distilled_blocks = {}
total_blocks    = 4
teacher = deepcopy(model)
student_i = None
for stage_idx in [4, 3, 2, 1]:
    print(stage_idx)
    pruned_block_i, student_i = run_cbkd_stage(
        teacher           = teacher,
        distilled_blocks  = distilled_blocks,
        stage_idx         = stage_idx,
        configs           = configs,
        cbkd_cfg          = cbkd_config,   # renamed
        train_loader      = None,
        total_blocks      = total_blocks,
        device            = "cpu"
    )
    # Save the newly‐pruned block into our dict
    distilled_blocks[stage_idx] = pruned_block_i

4
1
2
3
NON E' questo
3
1
2
NON E' questo
2
1
NON E' questo
1
NON E' questo
