In [149]:
import torch
from transformers import EfficientNetImageProcessor, EfficientNetForImageClassification
from PIL import Image
import math
from torch import nn


from distracted.dataset_loader import dataset_loader

image = Image.open('data/imgs/train/c0/img_34.jpg')

preprocessor = EfficientNetImageProcessor.from_pretrained("google/efficientnet-b7")
model = EfficientNetForImageClassification.from_pretrained("google/efficientnet-b7")

inputs = preprocessor(image, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# model predicts one of the 1000 ImageNet classes
predicted_label = logits.argmax(-1).item()
print(model.config.id2label[predicted_label]),
# device = torch.device("cuda")
device = torch.device('cpu')
print(device)


seat belt, seatbelt
cpu


In [114]:
type(model(**inputs))

transformers.modeling_outputs.ImageClassifierOutputWithNoAttention

In [None]:
dataset = dataset_loader()

[Link to guide for adding additional layers to pretrained model](https://medium.com/analytics-vidhya/how-to-add-additional-layers-in-a-pre-trained-model-using-pytorch-5627002c75a5)

In [132]:
class EfficientNet_Adapter_Finetuning(EfficientNetForImageClassification):

    def __init__(self):
        super(self).__init__()
        self.model = EfficientNetForImageClassification.from_pretrained("google/efficientnet-b7")

    def forward(self, pixel_values: torch.FloatTensor = None,): # Add input stuff
        # Get output for EfficientNetModel then do EfficientNetForImageClassification part of forward()
        # Should have self.embeddings from EfficientNetModel which will probably not need to be retrained
        embedding_output = self.embeddings(pixel_values)

        # self.encoder is likely what needs to be finetuned
        # Encoder is EfficentNetEncoder
        # Forward pass of Encoder loops through blocks and updates hidden_states
        # Endcoder with potential edits looks like following:

        # for block,adapter in (self.blocks,self.adapters):
        #     hidden_states = block(hidden_states)
        #     hidden_states = adapter(hidden_states)

        # hidden_states = self.top_conv(hidden_states)
        # hidden_states = self.top_bn(hidden_states)
        # hidden_states = self.top_activation(hidden_states)
        
        # Idea is to add adapter layer after each loop 

        # Maybe Make EfficinetNet_Adapter_Encoder model then change self.model.something.encoder to this new one
        # Can initialize it with pretrained so self.blocks is the same


        encoder_outputs = self.encoder(
            embedding_output,
            output_hidden_states=self.config.output_hidden_states,
            )

        # EfficientNetForImageClassification forward takes output of EfficientNetModel and does the following:
        # outputs = self.efficientnet(pixel_values, output_hidden_states=output_hidden_states, return_dict=return_dict)

        # pooled_output = outputs.pooler_output if return_dict else outputs[1]
        # pooled_output = self.dropout(pooled_output)
        # logits = self.classifier(pooled_output)
        # logits = self.classifier_act(logits)

        pass



    



In [133]:
encoder_instance = model.efficientnet.encoder
encoder_class = encoder_instance.__class__
# config = model.config

# block_input = torch.randn(1,64,300,300)
# encoder_block_output_class = encoder_instance.blocks[0](block_input).__class__
# print(encoder_block_output_class)

# Following two functions taken from modeling_efficientnet.py
def round_repeats(repeats,depth_coefficient):
            # Round number of block repeats based on depth multiplier.
            return int(math.ceil(depth_coefficient * repeats))
def round_filters(config, num_channels: int):
    r"""
    Round number of filters based on depth multiplier.
    """
    divisor = config.depth_divisor
    num_channels *= config.width_coefficient
    new_dim = max(divisor, int(num_channels + divisor / 2) // divisor * divisor)

    # Make sure that round down does not go down by more than 10%.
    if new_dim < 0.9 * num_channels:
        new_dim += divisor

    return int(new_dim)



class EfficientNetAdapterEncoding(encoder_class):
    def __init__(self, model):
        encoder_instance = model.efficientnet.encoder
        config = model.config
        super().__init__(config)
    
        self.blocks = encoder_instance.blocks
        self.top_conv = encoder_instance.top_conv
        self.top_bn = encoder_instance.top_bn
        self.top_activation = encoder_instance.top_activation
        self.adapters = []

        

        num_base_blocks = len(config.in_channels)
        adapter_dimensions = []
        block_dimensions = []
        for i in range(num_base_blocks):
            block_out_dim = round_filters(config,config.out_channels[i])
            block_in_dim = round_filters(config,config.in_channels[i]) 
            for _ in range(round_repeats(config.num_block_repeats[i],config.depth_coefficient)):
                block_dimensions.append((block_in_dim,block_out_dim))
        for j in range(len(block_dimensions)-1):
             adapter_dimension_input = block_dimensions[j][1] # output of previous block
             adapter_dimension_output = block_dimensions[j+1][0] # input of next block
             adapter_dimensions.append((adapter_dimension_input,adapter_dimension_output))
        last_adapter_dimension = block_dimensions[-1][1]
        adapter_dimensions.append((last_adapter_dimension,last_adapter_dimension))

        for adapter_dimension in adapter_dimensions:
             self.adapters.append(nn.Linear(*adapter_dimension))
        # Wrong dimensions
        # Also wayyyyyyyyyyyyyyyy too many parameters if right dimensions.....
        # Don't use Linear


    def forward(self,
                hidden_states,
                output_hidden_states = False,
                return_dict = True):
        
        for block,adapter in zip(self.blocks,self.adapters):
            hidden_states = block(hidden_states)
            hidden_states = adapter(hidden_states)
        
        hidden_states = self.top_conv(hidden_states)
        hidden_states = self.top_bn(hidden_states)
        hidden_states = self.top_activation(hidden_states)
            
        return (hidden_states,None) # Should work as forward pass just takes encoder_output[0]




In [129]:
adapter_encoding = EfficientNetAdapterEncoding(model)

In [121]:
# encoder_instance.blocks[0]

In [162]:
block_input = torch.randn(1,64,300,300)
encoder_block_output = encoder_instance.blocks[0](block_input)
print(encoder_block_output.size())
print(adapter_encoding.adapters[0])
adapter_encoding.adapters[0](encoder_block_output)

torch.Size([1, 32, 300, 300])
Linear(in_features=32, out_features=64, bias=True)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (9600x300 and 32x64)

In [152]:
encoder_instance = model.efficientnet.encoder
encoder_instance.to(device)
# adapter_encoding.to("cpu")
from torchsummary import summary
# use this later for checking trainable params etc
# block 0 has 64, 300, 300
block = encoder_instance.blocks[-1]
block.to(device)
summary(block,input_size=(640,300,300))

OutOfMemoryError: CUDA out of memory. Tried to allocate 440.00 MiB (GPU 0; 8.00 GiB total capacity; 6.93 GiB already allocated; 0 bytes free; 7.09 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [92]:
model.efficientnet.embeddings(**inputs).shape

torch.Size([1, 64, 300, 300])

In [69]:
# Loop through named_parameters and set adapters to True and everything else to False?
# for name,para in model.named_parameters():
#     print(name, para.requires_grad)

In [123]:
# model.config

in_dim 64 out_dim 32
in_dim 64 out_dim 32
in_dim 64 out_dim 32
in_dim 64 out_dim 32
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 32 out_dim 48
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 48 out_dim 80
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 80 out_dim 160
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 160 out_dim 224
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 out_dim 384
in_dim 224 