In [1]:
import os
import torch
import torch.nn as nn

In [2]:
ckpt_path = '/root/data/gaze_estimation/at_step_0060000.pth.tar'
ckpt = torch.load(ckpt_path, map_location='cpu')
ckpt.keys()

FileNotFoundError: [Errno 2] No such file or directory: '/root/data/gaze_estimation/at_step_0060000.pth.tar'

In [3]:
# import VGG model
from torchvision import models

class VGG_Gaze_Estimator(nn.Module):
    def __init__(self, pretrained=True):
        super(VGG_Gaze_Estimator, self).__init__()
        self.vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        # discard FC layers
        self.vgg16 = self.vgg16.features

        self.FC1 = nn.Linear(512, 64, bias=True)
        self.FC2 = nn.Linear(64, 64, bias=True)
        self.FC3 = nn.Linear(64, 4, bias=True)

        self.leakly_relu = nn.LeakyReLU(0.2, inplace=True)
        self.tanh = nn.Tanh()

        # initialize weights
        nn.init.kaiming_normal_(self.FC1.weight.data)
        nn.init.constant_(self.FC1.bias.data, val=0)
        nn.init.kaiming_normal_(self.FC2.weight.data)
        nn.init.constant_(self.FC2.bias.data, val=0)
        nn.init.kaiming_normal_(self.FC3.weight.data)
        nn.init.constant_(self.FC3.bias.data, val=0)

    def forward(self, x, feature_out_layers: list = None):
        features = []
        for i, layer in enumerate(self.vgg16):
            x = layer(x)
            if i in feature_out_layers:
                features.append(x)
        #print(x.shape)
        x = x.mean(-1).mean(-1) # global average pooling
        #print(x.shape, "after global average pooling")
        x = self.leakly_relu(self.FC1(x))
        x = self.leakly_relu(self.FC2(x))
        x = self.tanh(self.FC3(x))
        x = torch.pi * x * 0.5
        gaze_estimate = x[:, :2]
        head_estimate = x[:, 2:]
        return gaze_estimate, head_estimate, features \
            if feature_out_layers is not None \
            else None

# ---

test_model = VGG_Gaze_Estimator()
test_model

VGG_Gaze_Estimator(
  (vgg16): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0

In [5]:
#test_model.load_state_dict(ckpt)
test_X = torch.randn(1, 3, 128, 128)
test_out = test_model(test_X, feature_out_layers=[3, 8, 15, 22, 29])
print(len(test_out))
for i, layer in enumerate(test_out[-1]):
    print(f"layer {[3, 8, 15, 22, 29][i]}: {layer.shape}")

3
layer 3: torch.Size([1, 64, 128, 128])
layer 8: torch.Size([1, 128, 64, 64])
layer 15: torch.Size([1, 256, 32, 32])
layer 22: torch.Size([1, 512, 16, 16])
layer 29: torch.Size([1, 512, 8, 8])


In [6]:
test_out = test_model(test_X, feature_out_layers=[i for i in range(30)])
for i, layer in enumerate(test_out[-1]):
    print(f"layer {i}: {layer.shape}")

layer 0: torch.Size([1, 64, 128, 128])
layer 1: torch.Size([1, 64, 128, 128])
layer 2: torch.Size([1, 64, 128, 128])
layer 3: torch.Size([1, 64, 128, 128])
layer 4: torch.Size([1, 64, 64, 64])
layer 5: torch.Size([1, 128, 64, 64])
layer 6: torch.Size([1, 128, 64, 64])
layer 7: torch.Size([1, 128, 64, 64])
layer 8: torch.Size([1, 128, 64, 64])
layer 9: torch.Size([1, 128, 32, 32])
layer 10: torch.Size([1, 256, 32, 32])
layer 11: torch.Size([1, 256, 32, 32])
layer 12: torch.Size([1, 256, 32, 32])
layer 13: torch.Size([1, 256, 32, 32])
layer 14: torch.Size([1, 256, 32, 32])
layer 15: torch.Size([1, 256, 32, 32])
layer 16: torch.Size([1, 256, 16, 16])
layer 17: torch.Size([1, 512, 16, 16])
layer 18: torch.Size([1, 512, 16, 16])
layer 19: torch.Size([1, 512, 16, 16])
layer 20: torch.Size([1, 512, 16, 16])
layer 21: torch.Size([1, 512, 16, 16])
layer 22: torch.Size([1, 512, 16, 16])
layer 23: torch.Size([1, 512, 8, 8])
layer 24: torch.Size([1, 512, 8, 8])
layer 25: torch.Size([1, 512, 8, 8])

In [26]:
# add os path
import sys
sys.path.append('/root/data/dataset_prep')
import STEDGaze.dataset as dataset

gazeCaptureDataset = dataset.HDFDataset(
    hdf_file_path='/root/data/dataset_prep/faze_preprocess/outputs_sted/GazeCapture.h5'
)

ModuleNotFoundError: No module named 'core'