In [14]:
import torch
import torch.nn as nn
import torchvision
import numpy as np
import pandas as pd
import skimage
import os
import pickle

nAnchors = 24
batchSize = 16
numEpochs = 3
device = "cpu"

In [52]:
def extract_feats(model, df, fileDir, size=(224,224), outfile=None, nImages=330):
    newDf = df.copy()
    newDf.loc[:,"Features"] = np.zeros((newDf.shape[0], 2208)).tolist()
    newDf.loc[:,"Features"] = newDf.loc[:,"Features"].astype(object)
    
    # define new index for images where first significant digit is
    # sequence number, thus it has to have more overall digits 
    # than max image number. 
    newDf.loc[:,"ImageNum"] = np.zeros(newDf.shape[0])
    indexer = 10**len(str(nImages))

    for idx, row in df.iterrows():

        name = row["ImageFile"].split(".")[0]

        seq = int(name.split("/")[0].split("q")[-1])
        frame = int(name.split("frame")[-1])

        newDf.loc[idx, "ImageNum"] = indexer*seq + frame

        im = torch.from_numpy(skimage.transform.resize(
                                skimage.io.imread(
                                    os.path.join(fileDir,
                                                    row["ImageFile"])),
                                                    size,
                                                    mode="constant"))

        # extract features and put through GAP layer                                        
        im = (im.permute(2,0,1)).reshape(1,3,224,224)
        x = model.features(im.float())
        newDf.at[idx,"Features"] = np.squeeze(torch.nn.AvgPool2d(7)(x).detach().numpy())
    
    newDf.sort_values("ImageNum", inplace=True)

    if outfile:
        # use pickle format to make sure values of numpy array
        # are saved with full accuracy (saving as txt only yields
        # around 9 significant digits, pickle saves bit representation)
        newDf.to_pickle(outfile)

    return newDf

def define_anchors(df, nAnchors, outfile=None):
    
    newDf = df.copy()
    newDf.reset_index()
    
    newDf.loc[:,"AnchorDists"] = np.zeros((newDf.shape[0], nAnchors, 2)).tolist()
    newDf.loc[:,"AnchorDists"] = newDf.loc[:,"AnchorDists"].astype(object)

    myAnchors = np.zeros((nAnchors, 2))

    for i, j in enumerate(np.floor(np.linspace(0,df.shape[0], nAnchors, endpoint=False)).astype(int)):
        myAnchors[i,:] = np.array([newDf.loc[j, "X"], newDf.loc[j,"Y"]])
    
    for index, row in newDf.iterrows():
        newDf.at[index, "AnchorDists"] = myAnchors - np.array([newDf.loc[index, "X"], newDf.loc[index,"Y"]])

    if outfile:
        newDf.to_pickle(outfile)

    return newDf, myAnchors

def anchors_for_testSet(df, myAnchors, outfile=None):
    # since anchors are only defined on trainings set
    newDf = df.copy()

    newDf.loc[:,"AnchorDists"] = np.zeros((newDf.shape[0], myAnchors.shape[0], 2)).tolist()
    newDf.loc[:,"AnchorDists"] = newDf.loc[:,"AnchorDists"].astype(object)

    for index, row in newDf.iterrows():
        newDf.at[index, "AnchorDists"] = myAnchors - np.array([newDf.loc[index, "X"], newDf.loc[index,"Y"]])

    if outfile:
        newDf.to_pickle(outfile)

    return newDf

class AnchorDataSet(torch.utils.data.Dataset):
    def __init__(self, file, transform = None, target_transform=None):
        super(AnchorDataSet, self).__init__()
        self.df = pd.read_pickle(file)
        self.transform = transform
        self.target_transform=target_transform
        """
        self.filenames = dummy.loc[:,"ImageFile"]
        self.features = dummy.loc[:,"Features"]
        self.anchorDists = dummy.loc[:,"AnchorDists"]
        self."""
    
    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self,idx):
        
        feats = torch.from_numpy(self.df.loc[idx, "Features"]).double()
        anchorDists = torch.from_numpy(self.df.loc[idx, "AnchorDists"]).double()
        
        dofs = np.array([self.df.loc[idx,"Z"],self.df.loc[idx,"W"],self.df.loc[idx,"P"],self.df.loc[idx,"Q"],self.df.loc[idx,"R"]])
        dofs = torch.from_numpy(dofs).double()
        
        xy = torch.from_numpy(np.array([self.df.loc[idx,"X"], self.df.loc[idx,"Y"]])).double()
        imageFile = self.df.loc[idx, "ImageFile"]
        ImageNum = self.df.loc[idx,"ImageNum"]

        return {"Features": feats,
                "anchorDists": anchorDists,
                "dofs": dofs,
                "xy": xy,
                "ImageFile": imageFile,
                "ImageNum": ImageNum}


In [3]:
# we use pretrained denseNet
myModel = torchvision.models.densenet161(pretrained=True).float()

In [4]:
trainData = pd.read_csv("./ShopFacade/dataset_train.txt", delimiter=" ", skiprows=1)
trainData = pd.DataFrame(data={"ImageFile": trainData.loc[:,"ImageFile,"],
                                  "X": trainData.Camera,
                                  "Y": trainData.Position,
                                  "Z": trainData.loc[:,"[X"],
                                  "W": trainData.Y,
                                  "P": trainData.Z,
                                  "Q": trainData.W,
                                  "R": trainData.P})

trainData = extract_feats(myModel, trainData, "./ShopFacade/", outfile="./ShopFacade/traindata_with_features.pkl")
trainData, anch = define_anchors(trainData, nAnchors, "./ShopFacade/traindata_with_features_and_anchors.pkl")

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [5]:
testData = pd.read_csv("./ShopFacade/dataset_test.txt", delimiter=" ", skiprows=1)
testData = pd.DataFrame(data={"ImageFile": testData.loc[:,"ImageFile,"],
                                  "X": testData.Camera,
                                  "Y": testData.Position,
                                  "Z": testData.loc[:,"[X"],
                                  "W": testData.Y,
                                  "P": testData.Z,
                                  "Q": testData.W,
                                  "R": testData.P})

testData = extract_feats(myModel, testData, "./ShopFacade/", outfile="./ShopFacade/testdata_with_features.pkl")
testData = anchors_for_testSet(testData, anch, outfile="./ShopFacade/testdata_with_features_and_anchors.pkl")

In [54]:
class anchorNet(nn.Module):
    def __init__(self, nAnchors):
        super(anchorNet, self).__init__()
        self.classifier = nn.Linear(2208, nAnchors)
        self.regressor = nn.Linear(2208, 2*nAnchors)
        self.dof_regressor = nn.Linear(2208, 5)
        self.softmax = nn.Softmax(dim=1)
        self.double()
    
    def forward(self, feats):
        classify = self.softmax(self.classifier(nn.functional.relu(feats))) # TODO: ReLU useful?
        regress = self.regressor(nn.functional.relu(feats))
        dof_regress = self.dof_regressor(nn.functional.relu(feats))

        return classify, regress, dof_regress

def custom_loss(classify, regress, dof_regress, anchorDistsGt, dofGt, dofLoss,factors = [2.4,0,0.5]):
    # TODO: Possible Flaw (or the reason why this works?):
    # The net tries to learn realtive position to each anchorpoint
    # independently, so we have a large amount of degrees of freedom, even though relative
    # position to anchor points should have 2 DOF. Maybe try using more anchorpoints to
    # proof this point.

    """
    classify: output of anchor classifier
    regress: output of regressor
    dof_regress: output of dof_regressor
    anchorDistsGt: true distance to all anchor poitns
    dofGt: true remaining 4 DOF
    dofLoss: loss function used for dof_regressor
    factors: list of hyperparameters for weighting of different loss terms
    """

    dist = (regress.reshape(-1, nAnchors, 2) - anchorDistsGt)

    # TODO: Use softmax on gt-dof and dof for normalization
    lossXY = torch.sum(torch.linalg.norm(dist, axis = -1) * classify)
    # TODO: implement cross entropy
    
    return torch.sum(factors[0] * lossXY) + factors[1] + factors[2] * dofLoss(dof_regress, dofGt)


In [82]:
trainDataset = AnchorDataSet(file= "./ShopFacade/traindata_with_features_and_anchors.pkl")
trainDataloader = torch.utils.data.DataLoader(trainDataset, batch_size=16, shuffle=False, num_workers=0)

myNet = anchorNet(nAnchors=nAnchors).to(device)
dofLoss = nn.MSELoss()

optimizer = torch.optim.Adam(myNet.parameters(), lr=0.0003)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=80, gamma=0.5)

for epoch in range(20):
    epochLoss = 0
    for i, data in enumerate(trainDataloader):
        
        myNet.train()

        optimizer.zero_grad()
        
        classify, regress, dof_regress = myNet.forward(data["Features"])

        classify = torch.autograd.Variable(classify, requires_grad=True)
        regress = torch.autograd.Variable(regress, requires_grad=True)
        dof_regress = torch.autograd.Variable(dof_regress, requires_grad=True)
        
        loss = custom_loss(classify, regress, dof_regress, anchorDistsGt=data["anchorDists"], dofGt=data["dofs"], dofLoss=dofLoss)
        loss.backward()
        optimizer.step()

        epochLoss += loss.data.item()
        for param in myNet.parameters():
            print(param.grad)
    print("Loss this epoch: ", epochLoss)

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
Loss this epoch:  7416.37241208646
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
Loss this epoch:  7416.37241208646
None
None
None
None
None
None


In [80]:
for param in myNet.parameters():
    print(param)

Parameter containing:
tensor([[-1.0034e-02, -1.4697e-02,  1.1348e-02,  ...,  1.3746e-03,
         -1.2305e-02,  3.2203e-03],
        [ 4.5935e-03, -1.5501e-02, -2.0477e-02,  ..., -5.7512e-03,
          3.0687e-03,  1.1798e-02],
        [-1.3665e-02,  8.0313e-03,  5.4440e-05,  ..., -1.5479e-02,
          1.4837e-02,  1.9652e-02],
        ...,
        [ 1.9303e-03, -5.2583e-03, -3.8259e-03,  ..., -2.9748e-03,
         -1.9019e-02,  7.1479e-03],
        [-6.0018e-03,  1.7588e-02,  1.5821e-02,  ...,  4.0330e-03,
         -2.5783e-03,  1.1901e-02],
        [ 1.7472e-02, -1.0161e-02,  7.7957e-03,  ...,  2.0405e-02,
         -1.6638e-03,  1.4528e-03]], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([ 0.0071,  0.0164, -0.0176,  0.0009, -0.0046,  0.0198, -0.0048,  0.0184,
         0.0149,  0.0206, -0.0168, -0.0038, -0.0196, -0.0020,  0.0054, -0.0134,
         0.0126,  0.0142, -0.0032, -0.0129,  0.0173, -0.0031,  0.0211,  0.0201],
       dtype=torch.float64, requires_grad=