In [1]:
import torch
from torch import nn
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
#model.eval()

Downloading: "https://github.com/pytorch/vision/archive/v0.10.0.zip" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

In [4]:
# define an identity layer to replace layers after mixed_5d
class Identity(nn.Module): 
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

# replace aboundent layers to identity layers
model.fc=Identity()
model.dropout=Identity()
model.avgpool=Identity()
model.Mixed_7c=Identity()
model.Mixed_7b=Identity()
model.Mixed_7a=Identity()
model.AuxLogits=Identity()
model.Mixed_6e=Identity()
model.Mixed_6d=Identity()
model.Mixed_6c=Identity()
model.Mixed_6b=Identity()
model.Mixed_6a=Identity()
#model.eval()

In [5]:
# check dimensions 
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
  output = model(input_batch)


In [6]:
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output.view(-1,288,35,35).shape)

torch.Size([1, 288, 35, 35])


In [7]:
# need conv2d conv2d and spatial softmax
class Last_two_conv(nn.Module):
    def __init__(self, in_channels,hidden_channels,out_channels):
        super(Last_two_conv, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, hidden_channels,
                               kernel_size=5)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(hidden_channels, out_channels,
                               kernel_size=5)
        self.relu2 = nn.ReLU()

    def forward(self, x):
        # reshape
        x = x.view(-1,288,35,35)
        x = self.conv1(x)
        # Activation function
        x = self.relu1(x)
        # Second convolutional layer
        x = self.conv2(x)
        # Activation function
        x = self.relu2(x)
        return x

# replace the layer with last_two_conv

model.dropout=Last_two_conv(288,32,16)
with torch.no_grad():
  output = model(input_batch)
print(output.view(-1,16,27,27).shape)

torch.Size([1, 16, 27, 27])


In [12]:
# define spatialsofrmax
import numpy as np
class SpatialSoftmax(torch.nn.Module):
    def __init__(self, height, width, channel, temperature=None):
        super(SpatialSoftmax, self).__init__()
        self.height = height
        self.width = width
        self.channel = channel

        if temperature:  
          self.temperature = torch.ones(1)*temperature   
        else:   
          self.temperature = nn.Parameter(torch.ones(1))  

        pos_x, pos_y = np.meshgrid(
                np.linspace(-1., 1., self.height),
                np.linspace(-1., 1., self.width)
                )
        pos_x = torch.from_numpy(pos_x.reshape(self.height*self.width)).float()
        pos_y = torch.from_numpy(pos_y.reshape(self.height*self.width)).float()
        self.register_buffer('pos_x', pos_x)
        self.register_buffer('pos_y', pos_y)
        self.linear =nn.Linear(32, 32)

    def forward(self, feature):

        #feature = feature.view(-1,16,self.height*self.width)
        feature = feature.view(-1,self.height*self.width)

        softmax_attention = nn.functional.softmax(feature/self.temperature, dim=-1)
        expected_x = torch.sum(self.pos_x*softmax_attention, dim=1, keepdim=True)
        expected_y = torch.sum(self.pos_y*softmax_attention, dim=1, keepdim=True)
        expected_xy = torch.cat([expected_x, expected_y], 1)
        feature_keypoints = expected_xy.view(-1, self.channel*2)
        feature_keypoints = self.linear(feature_keypoints)

        return feature_keypoints

# replace fc layer with spatial softmax
model.fc=SpatialSoftmax(27,27,16,temperature=None)
with torch.no_grad():
  output = model(input_batch)
print(output.shape)
print(input_batch.shape)

torch.Size([1, 32])
torch.Size([1, 3, 299, 299])


triplelet loss test

In [11]:
torch.manual_seed(0)
triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2,reduction='sum')
anchor = torch.randn(1, 1, requires_grad=True)
positive = torch.randn(1, 1, requires_grad=True)
negative = torch.randn(1, 1, requires_grad=True)
print(anchor)
print(positive)
print(negative)


tensor([[1.5410]], requires_grad=True)
tensor([[-0.2934]], requires_grad=True)
tensor([[-2.1788]], requires_grad=True)


In [None]:
output = triplet_loss(anchor, positive, negative)
print(output)
nn.functional.relu((anchor-positive).norm(dim=0,p=2)-(anchor-negative).norm(dim=0,p=2)+1)

tensor(0., grad_fn=<SumBackward0>)


tensor([0.], grad_fn=<ReluBackward0>)

In [27]:
torch.manual_seed(0)
triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2,reduction='sum') # input data
anchor = torch.randn(1,32, 1, requires_grad=True) # 1 X 32 x 1
positive = torch.randn(4,32, 1, requires_grad=True) # 4 X 32 X 1
negative = torch.randn(10,32, 1, requires_grad=True) # N-4 X 32 X 1
print(anchor)
print(positive)
print(negative)

tensor([[1.5410]], requires_grad=True)
tensor([[-0.2934],
        [-2.1788],
        [ 0.5684],
        [-1.0845],
        [-1.3986]], requires_grad=True)
tensor([[ 0.4033],
        [ 0.8380],
        [-0.7193]], requires_grad=True)


In [None]:
output = triplet_loss(anchor, positive, negative)
print(output)
nn.functional.relu((anchor-positive).norm(dim=1,p=2)-(anchor-negative).norm(dim=1,p=2)+1).sum(dim=0)

tensor(5.7707, grad_fn=<SumBackward0>)


tensor(5.7707, grad_fn=<SumBackward1>)

In [22]:
torch.manual_seed(0)
triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2,reduction='sum')
anchor = torch.randn(3,3, 1, requires_grad=True)
positive = torch.randn(3,3, 5, requires_grad=True)
negative = torch.randn(3,3, 10, requires_grad=True)
print(anchor)
print(positive)
print(negative)

tensor([[[ 1.5410],
         [-0.2934],
         [-2.1788]],

        [[ 0.5684],
         [-1.0845],
         [-1.3986]],

        [[ 0.4033],
         [ 0.8380],
         [-0.7193]]], requires_grad=True)
tensor([[[ 0.5988, -1.5551, -0.3414,  1.8530,  0.4681],
         [-0.1577,  1.4437,  0.2660,  1.3894,  1.5863],
         [ 0.9463, -0.8437,  0.9318,  1.2590,  2.0050]],

        [[ 0.0537,  0.4397,  0.1124,  0.6408,  0.4412],
         [-0.2159, -0.7425,  0.5627,  0.2596,  0.5229],
         [ 2.3022, -1.4689, -1.5867,  1.2032,  0.9200]],

        [[ 1.1108,  1.2899, -1.4782,  2.5672, -0.4731],
         [ 0.3356,  1.5091, -0.5497, -0.4798, -0.4997],
         [-1.0670,  1.1149, -0.1407,  0.8058,  0.3276]]], requires_grad=True)
tensor([[[ 6.8705e-01, -8.3832e-01,  8.9182e-04,  8.4189e-01, -4.0003e-01,
           1.0395e+00,  3.5815e-01, -3.3039e-03,  2.3025e+00, -1.8817e+00],
         [-4.9727e-02, -1.0450e+00, -9.5650e-01,  3.3532e-02,  7.1009e-01,
          -1.5353e+00, -1.3602e+00,  3

In [25]:
output = triplet_loss(anchor, positive, negative) # anchor size Nsample x d x 1, positive size Nsample x d x window, negative size Nsample x d x (Nsample-window)
print(output)
print(anchor-positive)
print((anchor-positive).norm(dim=1,p=2))
#(anchor-positive).norm(dim=2,p=2)-(anchor-negative).norm(dim=2,p=2)+1
nn.functional.relu((anchor-positive).norm(dim=1,p=2)-(anchor-negative).norm(dim=1,p=2)+1)

tensor(4.7244, grad_fn=<SumBackward0>)
tensor([[[ 0.9422,  3.0961,  1.8824, -0.3120,  1.0729],
         [-0.1357, -1.7371, -0.5595, -1.6828, -1.8798],
         [-3.1251, -1.3351, -3.1106, -3.4378, -4.1838]],

        [[ 0.5147,  0.1287,  0.4560, -0.0724,  0.1273],
         [-0.8687, -0.3420, -1.6472, -1.3441, -1.6074],
         [-3.7008,  0.0703,  0.1881, -2.6018, -2.3186]],

        [[-0.7075, -0.8865,  1.8815, -2.1639,  0.8765],
         [ 0.5025, -0.6711,  1.3878,  1.3179,  1.3377],
         [ 0.3477, -1.8342, -0.5786, -1.5250, -1.0469]]],
       grad_fn=<SubBackward0>)
tensor([[3.2668, 3.7929, 3.6786, 3.8403, 4.7105],
        [3.8361, 0.3721, 1.7195, 2.9294, 2.8241],
        [0.9348, 2.1449, 2.4085, 2.9572, 1.9114]], grad_fn=<NormBackward1>)


RuntimeError: ignored

In [None]:
# Initialize embeddings
embedding = nn.Embedding(1000, 128)
anchor_ids = torch.randint(0, 1000, (1,))
positive_ids = torch.randint(0, 1000, (1,))
negative_ids = torch.randint(0, 1000, (1,))
anchor = embedding(anchor_ids)
positive = embedding(positive_ids)
negative = embedding(negative_ids)
# Built-in Distance Function
triplet_loss = \
    nn.TripletMarginWithDistanceLoss(distance_function=nn.PairwiseDistance())
output = triplet_loss(anchor, positive, negative)
print(anchor_ids)

tensor([430])
