In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
#import matplotlib.pyplot as plt
import time
import os
import copy
from collections import defaultdict

from TwoFileFolder import TwoFileFolder

In [2]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [3]:
data_dir = '/mnt/c/Users/gweld/sidewalk/sidewalk_ml/baby_ds/'

In [4]:
image_datasets = {x:TwoFileFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# We've got things set up like in train.py

now let's play around with a tensor

In [5]:
train_dataset = image_datasets['train']

In [6]:
len(train_dataset)

10

In [7]:
train_dataset.classes

['missing_ramp', 'null_crop', 'obstruction', 'ramp', 'sfc_problem']

In [8]:
example_img, target = train_dataset[0]

In [9]:
type(example_img)

torch.Tensor

In [10]:
len(example_img)

3

In [11]:
example_img

tensor([[[-0.7137, -0.7822, -0.7479,  ..., -0.5596, -0.6109, -0.7137],
         [-0.7308, -0.7993, -0.7137,  ..., -0.7137, -0.7308, -0.7479],
         [-0.7137, -0.7822, -0.6794,  ..., -0.7993, -0.8335, -0.8507],
         ...,
         [-1.2103, -1.2274, -1.2103,  ..., -1.0904, -1.0904, -1.0904],
         [-1.3302, -1.2617, -1.1589,  ..., -1.0733, -1.0733, -1.1075],
         [-1.3302, -1.2788, -1.1760,  ..., -1.0562, -1.0733, -1.1075]],

        [[-0.7752, -0.8452, -0.7752,  ..., -0.2850, -0.3025, -0.3550],
         [-0.7752, -0.8277, -0.7577,  ..., -0.4251, -0.4426, -0.4601],
         [-0.7402, -0.8102, -0.7227,  ..., -0.5301, -0.5651, -0.5826],
         ...,
         [-0.7752, -0.7927, -0.7752,  ..., -0.7577, -0.7577, -0.7577],
         [-0.8978, -0.8277, -0.7402,  ..., -0.7577, -0.7752, -0.8102],
         [-0.9153, -0.8803, -0.7752,  ..., -0.7752, -0.7927, -0.8102]],

        [[-0.7238, -0.7936, -0.7413,  ...,  0.1999,  0.1651,  0.0953],
         [-0.7413, -0.7936, -0.7064,  ...,  0

In [12]:
example_img.size()

torch.Size([3, 224, 224])

OK, so the image returned by our transform is a 3x224x224 tensor... let's see how we can add on to that a couple extra features

Start with experimenting with the json

In [13]:
import json

In [14]:
example_meta_file = train_dataset.samples[0][1]

In [15]:
with open(example_meta_file) as metafile:
    meta = json.load(metafile)

In [16]:
meta

{u'crop size': 1067.9562786956378,
 u'crop_x': 1890.7857638889081,
 u'crop_y': 4428.0,
 u'pano id': u'__BdOPEuXx_6Nwts-Ee1tw',
 u'pano yaw': -111.12658691406199,
 u'sv_x': 6000.0,
 u'sv_y': -1100.0}

In [17]:
import numpy as np

def meta_to_tensor(path_to_meta):
    ''' used by getitem to load the meta into a tensor'''
    with open(path_to_meta) as metafile:
        meta_dict = json.load(metafile)
        
        features = []
        # crop size as proxy for depth
        # hacky approximate normilization
        features.append( meta_dict[u'crop size']/1000 )
        
        # pano yaw degree
        features.append( np.sin(np.deg2rad(meta_dict[u'pano yaw'])) )
        features.append( np.cos(np.deg2rad(meta_dict[u'pano yaw'])) )
        
        # sv_x converted to degree
        horiz_degree = (meta_dict[u'sv_x'] / 13312) * 360
        features.append( np.sin(np.deg2rad( horiz_degree )) )
        features.append( np.cos(np.deg2rad( horiz_degree )) )
        
        # sv_y converted to degree
        vert_degree = (meta_dict[u'sv_y'] / 3328) * 360
        features.append( np.sin(np.deg2rad( vert_degree )) )
        features.append( np.cos(np.deg2rad( vert_degree )) )
        
        return torch.Tensor( features )
        
    

In [18]:
example_meta = meta_to_tensor(example_meta_file)
print example_meta
print example_meta.shape

tensor([ 1.0680, -0.9328, -0.3604,  0.3047, -0.9524, -0.8747, -0.4847])
torch.Size([7])


In [19]:
new_view = example_img.view(3*224*224)

In [20]:
new_view.shape

torch.Size([150528])

In [21]:
both = torch.cat((new_view, example_meta))

In [22]:
both.shape

torch.Size([150535])

In [23]:
len(example_meta) + 150528

150535

In [24]:
both[:150528].view((3,224,224)).allclose(example_img)

True

In [25]:
both[150528:].allclose(example_meta)

True

In [32]:
big_both = torch.cat((both.view((1,150535)),both.view((1,150535)),both.view((1,150535)),both.view((1,150535))))

In [33]:
big_both.shape

torch.Size([4, 150535])

In [34]:
big_img = torch.cat((example_img.view((1,3,224,224)),example_img.view((1,3,224,224)),example_img.view((1,3,224,224)),example_img.view((1,3,224,224))))

In [35]:
big_img.shape

torch.Size([4, 3, 224, 224])

In [39]:
big_both[0,:150528].view((3,224,224))

tensor([[[-0.7137, -0.7822, -0.7479,  ..., -0.5596, -0.6109, -0.7137],
         [-0.7308, -0.7993, -0.7137,  ..., -0.7137, -0.7308, -0.7479],
         [-0.7137, -0.7822, -0.6794,  ..., -0.7993, -0.8335, -0.8507],
         ...,
         [-1.2103, -1.2274, -1.2103,  ..., -1.0904, -1.0904, -1.0904],
         [-1.3302, -1.2617, -1.1589,  ..., -1.0733, -1.0733, -1.1075],
         [-1.3302, -1.2788, -1.1760,  ..., -1.0562, -1.0733, -1.1075]],

        [[-0.7752, -0.8452, -0.7752,  ..., -0.2850, -0.3025, -0.3550],
         [-0.7752, -0.8277, -0.7577,  ..., -0.4251, -0.4426, -0.4601],
         [-0.7402, -0.8102, -0.7227,  ..., -0.5301, -0.5651, -0.5826],
         ...,
         [-0.7752, -0.7927, -0.7752,  ..., -0.7577, -0.7577, -0.7577],
         [-0.8978, -0.8277, -0.7402,  ..., -0.7577, -0.7752, -0.8102],
         [-0.9153, -0.8803, -0.7752,  ..., -0.7752, -0.7927, -0.8102]],

        [[-0.7238, -0.7936, -0.7413,  ...,  0.1999,  0.1651,  0.0953],
         [-0.7413, -0.7936, -0.7064,  ...,  0

In [44]:
big_both.narrow(1,0,150528).view((big_both.size(0),3,224,224)).allclose(big_img)

True

In [47]:
big_both.narrow(1,150528,7)[0].allclose(example_meta)

True

In [62]:
x = torch.tensor( np.ones((4,512),dtype=np.float32) )
print x.shape
print x.dtype

m = big_both.narrow(1,150528,7)
print m.shape
print m.dtype

combined = torch.cat((x,m), dim=1)
print combined.shape

torch.Size([4, 512])
torch.float32
torch.Size([4, 7])
torch.float32
torch.Size([4, 519])


In [26]:
model = models.resnet18(pretrained=True)

In [27]:
model.state_dict().keys()

['conv1.weight',
 'bn1.weight',
 'bn1.bias',
 'bn1.running_mean',
 'bn1.running_var',
 'bn1.num_batches_tracked',
 'layer1.0.conv1.weight',
 'layer1.0.bn1.weight',
 'layer1.0.bn1.bias',
 'layer1.0.bn1.running_mean',
 'layer1.0.bn1.running_var',
 'layer1.0.bn1.num_batches_tracked',
 'layer1.0.conv2.weight',
 'layer1.0.bn2.weight',
 'layer1.0.bn2.bias',
 'layer1.0.bn2.running_mean',
 'layer1.0.bn2.running_var',
 'layer1.0.bn2.num_batches_tracked',
 'layer1.1.conv1.weight',
 'layer1.1.bn1.weight',
 'layer1.1.bn1.bias',
 'layer1.1.bn1.running_mean',
 'layer1.1.bn1.running_var',
 'layer1.1.bn1.num_batches_tracked',
 'layer1.1.conv2.weight',
 'layer1.1.bn2.weight',
 'layer1.1.bn2.bias',
 'layer1.1.bn2.running_mean',
 'layer1.1.bn2.running_var',
 'layer1.1.bn2.num_batches_tracked',
 'layer2.0.conv1.weight',
 'layer2.0.bn1.weight',
 'layer2.0.bn1.bias',
 'layer2.0.bn1.running_mean',
 'layer2.0.bn1.running_var',
 'layer2.0.bn1.num_batches_tracked',
 'layer2.0.conv2.weight',
 'layer2.0.bn2.weigh

In [28]:
model.fc.in_features

512

In [29]:
x = model.conv1(example_img)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [3, 224, 224] instead

In [None]:
example_img.shape

In [None]:
model.forward(example_img)