In [1]:
from datasets import *
from train import *
from torchvision import datasets, transforms, models

training

In [2]:
teacher = models.efficientnet_b0(weights='DEFAULT').to('cuda').eval()
student = models.shufflenet_v2_x0_5(weights='DEFAULT').to('cuda').eval()

In [3]:
with torch.no_grad():
    # first reinitialize the layer before classification to match the teacher feature dimensions
    student.conv5[0] = torch.nn.Conv2d(student.conv5[0].in_channels,teacher.classifier[1].in_features,kernel_size=(1, 1), stride=(1, 1), bias=False)
    student.conv5[1] = torch.nn.BatchNorm2d(teacher.classifier[1].in_features,eps=1e-05,momentum=0.1,affine=True,track_running_stats=True)

    # next create a new fc layer to match the teacher dimension
    student.fc = torch.nn.Linear(teacher.classifier[1].in_features,teacher.classifier[1].out_features)

    # finally copy the teacher fc parameters
    student.fc.weight[:,:] = teacher.classifier[1].weight[:,:]
    student.fc.bias[:] = teacher.classifier[1].bias[:]

    # freeze the classification layer
    for param in student.fc.parameters():
        param.requires_grad = False

    student.to('cuda')

In [4]:
train_loader, val_loader = load_imagenet(64,12345)
train(student,train_loader,val_loader,'cuda',lr=0.004,epochs=100,log_name="sn_frozen_headval")

2023-04-27 13:33:35.626753: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-27 13:33:35.759191: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




100%|██████████| 782/782 [01:10<00:00, 11.05it/s]

epoch: 0, val accuracy: 0.40392







100%|██████████| 782/782 [01:10<00:00, 11.03it/s]

epoch: 1, val accuracy: 0.43348







100%|██████████| 782/782 [01:10<00:00, 11.06it/s]

epoch: 2, val accuracy: 0.47742







100%|██████████| 782/782 [01:11<00:00, 11.01it/s]

epoch: 3, val accuracy: 0.49422







100%|██████████| 782/782 [01:10<00:00, 11.03it/s]

epoch: 4, val accuracy: 0.50168







100%|██████████| 782/782 [01:11<00:00, 10.90it/s]

epoch: 5, val accuracy: 0.50692







100%|██████████| 782/782 [01:10<00:00, 11.03it/s]

epoch: 6, val accuracy: 0.50882







100%|██████████| 782/782 [01:10<00:00, 11.04it/s]

epoch: 7, val accuracy: 0.51142







100%|██████████| 782/782 [01:12<00:00, 10.86it/s]








KeyboardInterrupt: 

testing

In [13]:
test_loader = load_imagenet(128,1234,False)

In [12]:
torch.load("best_batch_i288264sn_frozen_head_long1682578354.1065493.pth")['lr']

[5.142319898206824e-06]

In [11]:
student = models.shufflenet_v2_x0_5(weights='DEFAULT').to('cuda')
with torch.no_grad():
    # first reinitialize the layer before classification to match the teacher feature dimensions
    student.conv5[0] = torch.nn.Conv2d(student.conv5[0].in_channels,teacher.classifier[1].in_features,kernel_size=(1, 1), stride=(1, 1), bias=False)
    student.conv5[1] = torch.nn.BatchNorm2d(teacher.classifier[1].in_features,eps=1e-05,momentum=0.1,affine=True,track_running_stats=True)

    # next create a new fc layer to match the teacher dimension
    student.fc = torch.nn.Linear(teacher.classifier[1].in_features,teacher.classifier[1].out_features)

    # freeze the classification layer
    for param in student.fc.parameters():
        param.requires_grad = False
student.load_state_dict(torch.load("best_batch_i288264sn_frozen_head_long1682578354.1065493.pth")['model_state_dict'])
student.to('cuda')
student.train()

ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): InvertedResidual(
      (branch1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU(inplace=True)
      )
      (branch2): Sequential(
        (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_

In [14]:
acc = validate(student,test_loader,'cuda')

100%|██████████| 391/391 [04:39<00:00,  1.40it/s]


In [15]:
acc

(0.50772, 2.187045480436681, 0.75248)