In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights
import pytorch_lightning as pl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

class PoseDetector(pl.LightningModule):
    def __init__(self):
        super().__init__()
        weights = ResNet50_Weights.DEFAULT
        self.model = resnet50(weights=weights)
        self.model.fc = Identity()
        self.regression = nn.Linear(2048, 6)
        self.preprocess = weights.transforms()

    def forward(self, x):
        x = self.model(x)
        x = self.regression(x)
        return x

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        output = self(x)
        loss = F.mse_loss(output, y)
        self.log('train_loss', loss, prog_bar=True, sync_dist=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        output = self(x)
        loss = F.mse_loss(output, y)
        self.log('val_loss', loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [3]:
img = torch.load("/home/nlp/ron.eliav/pose3d/data/resnet_dev_data/181228_000833939_Camera_0.jpg.pt")
# img = torch.load("/home/nlp/ron.eliav/pose3d/data/resnet_data/180502_025112897_Camera_0.jpg.pt")

In [4]:
img

tensor([[[ 2.1804,  2.1804,  2.1290,  ...,  2.0605,  1.8550,  1.4783],
         [ 0.9474,  1.0844,  1.5125,  ...,  2.0605,  1.5982,  1.1358],
         [-0.7479, -0.6452,  0.1254,  ...,  1.7180,  1.2214,  1.0159],
         ...,
         [ 0.2453,  0.2453,  0.2624,  ..., -1.2103, -1.2103, -1.1932],
         [ 0.2282,  0.2282,  0.2453,  ..., -1.2103, -1.2445, -1.2617],
         [ 0.2453,  0.2453,  0.2453,  ..., -1.2103, -1.2445, -1.2617]],

        [[ 2.4111,  2.4111,  2.3235,  ...,  2.0784,  1.7108,  1.4132],
         [ 1.7283,  1.7458,  1.9909,  ...,  1.9559,  1.3081,  0.9755],
         [-0.2850, -0.2150,  0.3452,  ...,  1.6758,  1.1155,  0.9230],
         ...,
         [ 1.0805,  1.0805,  1.0805,  ..., -1.0728, -1.1078, -1.0903],
         [ 1.0805,  1.0630,  1.0805,  ..., -1.1078, -1.1429, -1.1078],
         [ 1.0805,  1.0805,  1.0630,  ..., -1.1078, -1.1078, -1.1078]],

        [[ 2.5877,  2.5877,  2.4657,  ...,  1.8208,  1.5594,  1.2108],
         [ 1.9428,  1.9603,  2.2043,  ...,  1

In [5]:
model = PoseDetector.load_from_checkpoint("/home/nlp/ron.eliav/pose3d/ai-camera-pose-network/src/lightning_logs/version_9/checkpoints/epoch=108-step=3270.ckpt")

In [6]:
# expand tensor dimensions to match model input
img = torch.unsqueeze(img, 0)
model(img)
# dev: 34132.3559,41088.3516,12.0351,161.386998058,87.677117148,-50.072881833
# train: 34768.9649,42018.1341,15.2489,153.73610589,85.94014665,-40.957096437


In [14]:
torch.tensor([34132.3559,41088.3516,12.0351,161.386998058,87.677117148,-50.072881833])

tensor([ 3.4132e+04,  4.1088e+04,  1.2035e+01,  1.6139e+02,  8.7677e+01,
        -5.0073e+01])