In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import models, transforms, utils
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import scipy.misc
from PIL import Image
import json


In [2]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

Using cache found in C:\Users\dingb/.cache\torch\hub\pytorch_vision_v0.10.0


In [3]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [4]:
# we will save the conv layer weights in this list
model_weights =[]
#we will save the 49 conv layers in this list
conv_layers = []
# get all the model children as list
model_children = list(model.children())
#counter to keep count of the conv layers
counter = 0
#append all the conv layers and their respective wights to the list
for i in range(len(model_children)):
    if type(model_children[i]) == nn.Conv2d:
        counter+=1
        model_weights.append(model_children[i].weight)
        conv_layers.append(model_children[i])
    elif type(model_children[i]) == nn.Sequential:
        for j in range(len(model_children[i])):
            for child in model_children[i][j].children():
                if type(child) == nn.Conv2d:
                    counter+=1
                    model_weights.append(child.weight)
                    conv_layers.append(child)
print(f"Total convolution layers: {counter}")
print("conv_layers")

Total convolution layers: 17
conv_layers


In [22]:
from torchvision.ops import roi_align
import math

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [31]:
preprocess = transforms.Compose([
    # transforms.Resize((1280,720)),
    # transforms.CenterCrop(1280),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

image = Image.open('dst/fisheye/test/0.jpg')
image = preprocess(image)
img_shape = image.shape
print(img_shape)
print(f"Image shape before: {image.shape}")
image = image.unsqueeze(0)
print(f"Image shape after: {image.shape}")
image = image.to(device)

torch.Size([3, 960, 1280])
Image shape before: torch.Size([3, 960, 1280])
Image shape after: torch.Size([1, 3, 960, 1280])


In [32]:
outputs = []
names = []
for layer in conv_layers[0:]:
    image = layer(image)
    outputs.append(image)
    names.append(str(layer))
print(len(outputs))
#print feature_maps
for feature_map in outputs:
    print(feature_map.shape)
feature = outputs[-1]

17
torch.Size([1, 64, 480, 640])
torch.Size([1, 64, 480, 640])
torch.Size([1, 64, 480, 640])
torch.Size([1, 64, 480, 640])
torch.Size([1, 64, 480, 640])
torch.Size([1, 128, 240, 320])
torch.Size([1, 128, 240, 320])
torch.Size([1, 128, 240, 320])
torch.Size([1, 128, 240, 320])
torch.Size([1, 256, 120, 160])
torch.Size([1, 256, 120, 160])
torch.Size([1, 256, 120, 160])
torch.Size([1, 256, 120, 160])
torch.Size([1, 512, 60, 80])
torch.Size([1, 512, 60, 80])
torch.Size([1, 512, 60, 80])
torch.Size([1, 512, 60, 80])


In [33]:
print(feature)

tensor([[[[-1.7878e+04, -2.8151e+04, -2.5242e+04,  ..., -1.6008e+04,
           -1.2243e+04, -5.5153e+03],
          [-3.7528e+04, -5.9350e+04, -5.5575e+04,  ..., -3.7982e+04,
           -2.9278e+04, -1.2860e+04],
          [-4.4648e+04, -7.0546e+04, -6.5057e+04,  ..., -4.4404e+04,
           -3.3986e+04, -1.4087e+04],
          ...,
          [-4.7554e+04, -7.5498e+04, -6.9030e+04,  ..., -4.8214e+04,
           -3.7418e+04, -1.4694e+04],
          [-3.9602e+04, -6.3697e+04, -5.9099e+04,  ..., -4.2570e+04,
           -3.2815e+04, -1.2766e+04],
          [-2.0347e+04, -3.2085e+04, -2.6925e+04,  ..., -1.4888e+04,
           -1.1159e+04, -2.7693e+03]],

         [[ 1.0409e+04,  1.8848e+04,  1.9404e+04,  ...,  1.5680e+04,
            1.5500e+04,  7.9603e+03],
          [ 5.8602e+03,  1.2681e+04,  1.0690e+04,  ...,  7.5118e+03,
            1.0667e+04,  4.7745e+03],
          [-6.5281e+03, -7.5516e+03, -1.4639e+04,  ..., -1.5791e+04,
           -6.4263e+03, -4.5891e+03],
          ...,
     

In [35]:
box = [torch.tensor([[ 382., 790., 437., 866.]]).to(device)]
w, l = 437-382, 866-790
scale = feature.shape[2]/img_shape[1]
l = l*scale
w = w*scale
output_size = (int(math.ceil(w)), int(math.ceil(l)))
print(output_size)
aligned_f = roi_align(feature, box, output_size= output_size, spatial_scale=scale, aligned=True)
print(aligned_f.shape)


(4, 5)
torch.Size([1, 512, 4, 5])


In [66]:
import torchvision
torchvision.__version__


AttributeError: module 'torchvision' has no attribute '__version__'

In [None]:
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [33]:
from PIL import Image
from torchvision import transforms
input_image = Image.open('detection/326.jpg')
# input_image = input_image.convert("RGB")
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model



# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
# print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
# print(probabilities)

In [34]:
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]
# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
print(top5_catid)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())

tensor([473, 600, 477, 726, 783], device='cuda:0')
can opener 0.41975638270378113
hook 0.07524648308753967
carpenter's kit 0.02796514891088009
plane 0.026483098044991493
screw 0.026465121656656265
