**G-EM** uses: <br />
128x128 RGB images at a reduced frame rate of 5hz
VGG model pretrained on the ILSVRC-2012 dataset, after applying a a convolutional operation with 256 1x1 kernels on the output of the fully connected layer to reduce its dimensionality from 2048 to 256.

In [27]:
import torch
import torchvision
import numpy as np
from torch import optim, nn
from torchvision import models, transforms
model = models.vgg16(pretrained=True)

In [12]:
class FeatureExtractor(nn.Module):
    def __init__(self, model):
        super(FeatureExtractor, self).__init__()
        #VGG-16 Feature Layers
        self.features = list(model.features)
        self.features = nn.Sequential(*self.features)
        #VGG-16 Average Pooling Layer
        self.pooling = model.avgpool
        #Image into one-dimensional vector
        self.flatten = nn.Flatten()
        # Extract the first part of fully-connected layer from VGG16
        self.fc = model.classifier[0]
        
        #additional 
        self.last = nn.Linear(4096, 256)
        #in_channels=in_features, out_channels=num_classes, kernel_size=1
        #self.convolutions = nn.Conv2d(inputfeature, outputfeature, 3, padding=1)
  
    def forward(self, x):

        out = self.features(x)
        out = self.pooling(out)
        out = self.flatten(out)
        out = self.fc(out) 
        

        out = self.last(out)
        return out 



In [13]:
# Initialize the model
model = models.vgg16(pretrained=True)
new_model = FeatureExtractor(model)

# Change the device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)

In [14]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [15]:
print(new_model)

FeatureExtractor(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=

In [18]:
#used to generate a random picture to test dimensions 
x_test = torch.randn(1,3,128,128)


In [19]:
img = x_test.to(device)

In [21]:
with torch.no_grad():
        # Extract the feature from the image
        feature = new_model(img)

In [22]:
print('test:', feature.size())

test: torch.Size([1, 256])


In [23]:
feature2 = feature.cpu().detach().numpy().reshape(-1)
feature2 = np.array(feature2)
feature2

array([-1.7275746e+00,  2.3211155e+00,  3.6684957e-01, -1.1551425e+00,
        1.4975404e+00, -1.9239101e-01,  2.2417238e-01, -3.1314749e-01,
        1.9116800e-01,  8.8802147e-01,  5.6785893e-01,  2.2289724e+00,
       -8.4435713e-01,  1.6069946e+00, -1.4385359e+00,  6.8476158e-01,
        3.8820413e-01,  7.5183523e-01, -6.4111191e-01, -1.2691690e+00,
       -1.0025713e+00, -8.3164141e-02, -5.7458985e-01, -1.9322129e-01,
       -2.0938606e+00,  3.9149585e-01,  1.4792264e+00, -1.4138621e-01,
       -1.8766364e+00,  2.2859268e+00,  7.8887445e-01, -1.9380818e-01,
       -7.0947319e-01, -6.7389470e-01,  2.2610192e-01, -7.8376132e-01,
       -2.2947155e-03,  2.2897674e-02, -3.4576935e-01,  7.7805907e-01,
       -7.1402478e-01,  4.1420802e-01, -9.7193891e-01, -5.4388511e-01,
       -4.3576702e-02, -5.2412277e-01,  5.2911776e-01, -1.5352100e-01,
        1.6820659e-01,  1.0148305e+00, -8.9298564e-01, -8.6586910e-01,
        3.3035979e-01, -5.2902257e-01,  9.4566926e-02, -1.6955068e+00,
      