In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import os

# modify the project path below accordingly
PATH = "/content/drive/MyDrive/HGR/gesture_face_features"

sys.path.append(os.path.join(PATH, "code"))

%load_ext autoreload
%autoreload 2

In [3]:
data_path = os.path.join(PATH, 'samples', 'sign')
feat_path = os.path.join(PATH, 'feats')

In [4]:
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models.feature_extraction import create_feature_extractor

In [5]:
class SignImageFolder(ImageFolder):
  def __init__(self, root, transform=transforms.Compose([
        transforms.Resize((600, 600), transforms.InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225]),
      ])):
    super().__init__(root, transform=transform)
    self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}
    self.dims = []

  def __getitem__(self, index):
    image, label = super().__getitem__(index)
    image_path = self.samples[index][0]
    filename = os.path.basename(image_path)
    image_id = int(filename.split('_')[1].replace('.JPG', ''))

    return image, label, image_id

  def denormalize(self, tensor):
    inv_normalize = transforms.Normalize(
        mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
        std=[1/0.229, 1/0.224, 1/0.225]
    )
    return inv_normalize(tensor)

  def plot(self, id):
      tensor, class_name, img_id = self.__getitem__(id)
      tensor = self.denormalize(tensor).clamp(0,1)
      image = tensor.permute(1,2,0).numpy()

      plt.imshow(image)
      plt.title(f"{self.idx_to_class[class_name]}:{img_id}")
      plt.show()

In [22]:
class ModelLoader:
  available_models = ['vgg19','resnet50','inception_v3',
                      'efficientnet_b0','efficientnet_b1','efficientnet_b6']
  def __init__(self, model_name):
    assert model_name in self.available_models
    self.model_name = model_name
    self.model, self.layers = self.load()
    self.input_dim = self.input_dims[model_name]

  def load(self):
      '''
        https://jacobgil.github.io/pytorch-gradcam-book/introduction.html

        Resnet18 and 50: model.layer4[-1]
        VGG and densenet161: model.features[-1]
        ViT: model.blocks[-1].norm1
        SwinT: model.layers[-1].blocks[-1].norm1

        Are suggested for last convolutional layers.
      '''
      if self.model_name == 'vgg19':
          from torchvision.models import vgg19, VGG19_Weights
          model = vgg19(weights=VGG19_Weights.DEFAULT)
          layers = (model.features[34], model.avgpool)
      elif self.model_name == 'resnet50':
          from torchvision.models import resnet50, ResNet50_Weights
          model = resnet50(weights=ResNet50_Weights.DEFAULT)
          layers = (model.layer4[2].conv3, model.avgpool)
      elif self.model_name == 'inception_v3':
          from torchvision.models import inception_v3, Inception_V3_Weights
          model = inception_v3(weights=Inception_V3_Weights.DEFAULT)
          layers = (model.Mixed_7c.branch_pool.conv, model.avgpool)
      elif self.model_name == 'efficientnet_b0':
          from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
          model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
          layers = (model.features[8][0], model.avgpool)
      elif self.model_name == 'efficientnet_b1':
          from torchvision.models import efficientnet_b1, EfficientNet_B1_Weights
          model = efficientnet_b1(weights=EfficientNet_B1_Weights.DEFAULT)
          layers = (model.features[8][0], model.avgpool)
      elif self.model_name == 'efficientnet_b6':
          from torchvision.models import efficientnet_b6, EfficientNet_B6_Weights
          model = efficientnet_b6(weights=EfficientNet_B6_Weights.DEFAULT)
          layers = (model.features[8][0], model.avgpool)
      model.eval()
      return model, layers

  # https://discuss.pytorch.org/t/how-to-get-input-shape-of-model/85877/4
  input_dims = {
    'vgg19': (224, 224),
    'resnet50': (224, 224),
    'inception_v3': (299, 299),
    'efficientnet_b0': (224, 224),
    'efficientnet_b1': (240, 240),
    'efficientnet_b6': (528, 528),
  }

In [7]:
dataset = SignImageFolder(data_path)
dataloader = DataLoader(dataset, batch_size=32, num_workers=2)

In [27]:
import torch
import os

class FeatureExtractor:
    def __init__(self, modelloader, dataloader, feat_path, layer_type='gap'):
        assert layer_type in ['conv', 'gap']
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.m = modelloader
        self.dataloader = dataloader
        self.feats = {}
        self.feat_path = feat_path
        self.layer_type = layer_type
        self.layer = self.m.layers[0] if self.layer_type == 'conv' else self.m.layers[1]
        self.path = os.path.join(feat_path, f"{self.m.model_name}_{self.layer_type}_layer_feats.pt")

    def _hook_fn(self, module, input, output):
        for idx, img_id in enumerate(self.img_ids):
            self.feats[img_id.item()] = output[idx]

    def extract_features(self, save):
        print(f'Extracting features for {self.m.model_name} {self.layer} layer.')
        self.m.model.to(self.device)

        self.hook = self.layer.register_forward_hook(self._hook_fn)
        for images, labels, img_ids in self.dataloader:
            self.img_ids = img_ids
            images = images.to(self.device)
            with torch.no_grad():
                output = self.m.model(images)
            print(f"Extracted ids for this batch: {img_ids}")
        self.hook.remove()

        if save:
          self.save_features()

    def save_features(self):
        torch.save(self.feats, self.path)
        print(f"Features saved to {self.path}")

    def load_features(self):
        if os.path.exists(self.path):
            loaded_feats = torch.load(self.path, weights_only=True)
            self.feats = loaded_feats
            print(f"Features loaded from {self.path}")
            return True
        else:
            print(f"No features found at {self.path}. For computing features call extract_features().")
            return False

def get_all_feats(layer_type, save=True):
  feats = {}
  for model_name in ModelLoader.available_models:
    m = ModelLoader(model_name)
    fe = FeatureExtractor(m, dataloader, feat_path, layer_type)
    if not fe.load_features():
      fe.extract_features(save=save)
    feats[model_name] = fe.feats
  return feats

Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/vgg19_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/resnet50_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/inception_v3_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b0_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b1_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b6_gap_layer_feats.pt


In [34]:
class FeatureExtractor:
    def __init__(self, modelloader, dataloader, feat_path):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.m = modelloader
        self.dataloader = dataloader
        self.feat_path = feat_path
        self.conv_layer_feats = {}
        self.gap_layer_feats = {}
        self.paths = {
            "conv": os.path.join(self.feat_path, f"{self.m.model_name}_conv_layer_feats.pt"),
            "gap": os.path.join(self.feat_path, f"{self.m.model_name}_gap_layer_feats.pt")
        }

    def _hook_fn_conv(self, module, input, output):
        for idx, img_id in enumerate(self.img_ids):
            self.conv_layer_feats[img_id.item()] = output[idx]

    def _hook_fn_gap(self, module, input, output):
        for idx, img_id in enumerate(self.img_ids):
            self.gap_layer_feats[img_id.item()] = output[idx]

    def extract_features(self, save):
        print(f'Extracting features for {self.m.model_name} layers.')
        self.m.model.to(self.device)

        self.hook1 = self.m.layers[0].register_forward_hook(self._hook_fn_conv)
        self.hook2 = self.m.layers[1].register_forward_hook(self._hook_fn_gap)

        for images, labels, img_ids in self.dataloader:
            self.img_ids = img_ids
            images = images.to(self.device)
            with torch.no_grad():
                output = self.m.model(images)
            print(f"Extracted ids for this batch: {img_ids}")

        self.hook1.remove()
        self.hook2.remove()

        if save:
            self.save_features()

    def save_features(self):
        torch.save(self.conv_layer_feats, self.paths['conv'])
        print(f"Conv layer features saved to {self.paths['conv']}")
        torch.save(self.gap_layer_feats, self.paths['gap'])
        print(f"Gap layer features saved to {self.paths['gap']}")

    def load_features(self, layer_type):
        assert layer_type in ['conv', 'gap']
        if os.path.exists(self.paths[layer_type]):
            loaded_feats = torch.load(self.paths[layer_type], weights_only=True)
            if layer_type == 'conv':
                self.conv_layer_feats = loaded_feats
            elif layer_type == 'gap':
                self.gap_layer_feats = loaded_feats
            print(f"Features loaded from {self.paths[layer_type]}")
            return True
        else:
            print(f"No features found at {self.paths[layer_type]}. For computing features call extract_features().")
            return False

def get_all_feats(save=True):
    feats_conv = {}
    feats_gap = {}
    for model_name in ModelLoader.available_models:
        m = ModelLoader(model_name)
        fe = FeatureExtractor(m, dataloader, feat_path)

        if not fe.load_features('conv'):
            fe.extract_features(save=save)
        feats_conv[model_name] = fe.conv_layer_feats
        if not fe.load_features('gap'):
            fe.extract_features(save=save)
        feats_gap[model_name] = fe.gap_layer_feats

    return feats_conv, feats_gap

In [36]:
feats_conv, feats_gap = get_all_feats()

Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/vgg19_conv_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/vgg19_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/resnet50_conv_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/resnet50_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/inception_v3_conv_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/inception_v3_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b0_conv_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b0_gap_layer_feats.pt
Features loaded from /content/drive/MyDrive/HGR/gesture_face_features/feats/efficientnet_b1_conv_layer_feats.pt
Features loaded from /content/drive/

In [51]:
feats_gap['vgg19'][1539]

tensor([[[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00]],

        [[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 7.9675e-02,  ..., 2.3662e-01,
          1.2576e-01, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 2.5457e-01,  ..., 5.8608e-02,
          4.3956e-02, 0.0000e+00],
         ...,
         [0.0000e+00, 6.8307e-01, 4.0275e-01,  ..., 1.3315e-01,
          0.000

In [None]:
participants = {
    0: list(range(2914, 2951)),
    1: list(range(2871, 2904)),
    2: list(range(2323, 2356)),
    3: list(range(2285, 2314)),
    4: list(range(1646, 1675)),
    5: list(range(1503, 1535)) + list(range(1537, 1544)),
}
p0_feats = {img_id: data for img_id, data in feats.items() if img_id in participants[0]}

In [None]:
len(p0_feats)

37