In [2]:
import torchvision.models as models
import torch
import torch.nn as nn

In [3]:
alexnet = models.alexnet(pretrained = True)

In [4]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [92]:
from torchvision import transforms
from PIL import Image
transform = transforms.Compose([            #[1]
 transforms.Resize(256),                    #[2]
 transforms.CenterCrop(224),                #[3]
 transforms.ToTensor(),                     #[4]
 transforms.Normalize(                      #[5]
 mean=[0.485, 0.456, 0.406],                #[6]
 std=[0.229, 0.224, 0.225]                  #[7]
 )])

In [93]:
def squareCrop(x, squareDim):
    left = (x.size[0]-squareDim)/2
    upper = (x.size[1]-squareDim)/2
    right = (x.size[0] + squareDim)/2
    lower = (x.size[1] + squareDim)/2
    return x.crop(box = (left, upper, right, lower))

In [94]:
def resize(x, squareDim):
    width, height = x.size
    scale = squareDim/min(width, height)
    return x.resize((int(scale * width), int(scale * height)), resample = Image.LANCZOS)

In [98]:
def getImages(path, fileEnding, num):
    images = [Image.open(path + str(i) + fileEnding) for i in range(num)]

    minWidth = min(images, key = lambda x: x.size[0]).size[0]
    minHeight = min(images, key = lambda x: x.size[1]).size[1]
    squareDim = min(minWidth, minHeight)

    print(squareDim)

    cropfn = lambda x: x.crop(box = ((x.size[0]-squareDim)/2,(x.size[1]-squareDim)/2,squareDim,squareDim))
    for i in range(len(images)):
        images[i] = squareCrop(resize(images[i],squareDim), squareDim).convert('LA')
    return images

In [99]:
def displayImages(images):
    for i in images:
        i.show()

In [100]:
# Import Pillow
from PIL import Image
# img = Image.open("dog.jpg")
dogPath = "Images/randDogSet/img"
buildingPath = "Images/randBuildingSet/img"
fileEnding = ".jpg"
num = 20

dogImages = getImages(dogPath, fileEnding, num)
buildingImages = getImages(buildingPath, fileEnding, num)
displayImages(dogImages)
displayImages(buildingImages)

164
508


In [7]:
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0) # batch_t is our transformed image

In [8]:
alexnet.eval() # change alexnet model to eval mode

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
out = alexnet(batch_t)
print(out.shape)

torch.Size([1, 1000])


In [10]:
with open('imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

In [11]:
_, index = torch.max(out, 1)
 
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
 
print(labels[index[0]], percentage[index[0]].item())

Labrador retriever 41.585166931152344


In [12]:
_, indices = torch.sort(out, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]


[('Labrador retriever', 41.585166931152344),
 ('golden retriever', 16.59166145324707),
 ('Saluki, gazelle hound', 16.286880493164062),
 ('whippet', 2.8539133071899414),
 ('Ibizan hound, Ibizan Podenco', 2.3924720287323)]

In [13]:
model = models.alexnet(pretrained=True)

In [14]:
i = 0
j = 0
a = []
for child in model.children():
    print("child", i,":")
    if isinstance(child, nn.AdaptiveAvgPool2d):
        print("\t" + str(child))
    else:
        for param in child:
            print("\tLayer", j ,":", param)
            instance = isinstance(param, nn.ReLU) or isinstance(param, nn.MaxPool2d) or isinstance(param, nn.Dropout)
            if not instance:
                a.append(param.weight)
            j += 1
    i += 1
# need to pick layer to read out the features from passing an image through the network 

child 0 :
	Layer 0 : Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
	Layer 1 : ReLU(inplace=True)
	Layer 2 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
	Layer 3 : Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
	Layer 4 : ReLU(inplace=True)
	Layer 5 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
	Layer 6 : Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 7 : ReLU(inplace=True)
	Layer 8 : Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 9 : ReLU(inplace=True)
	Layer 10 : Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 11 : ReLU(inplace=True)
	Layer 12 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
child 1 :
	AdaptiveAvgPool2d(output_size=(6, 6))
child 2 :
	Layer 13 : Dropout(p=0.5, inplace=False)
	Layer 14 : Linear(in_features=9216, out_features=4096, bias=True)
	Layer 15 : ReLU(i

In [15]:
#weights
print(a)

[Parameter containing:
tensor([[[[ 1.1864e-01,  9.4069e-02,  9.5435e-02,  ...,  5.5822e-02,
            2.1575e-02,  4.9963e-02],
          [ 7.4882e-02,  3.8940e-02,  5.2979e-02,  ...,  2.5709e-02,
           -1.1299e-02,  4.1590e-03],
          [ 7.5425e-02,  3.8779e-02,  5.4930e-02,  ...,  4.3596e-02,
            1.0225e-02,  1.3251e-02],
          ...,
          [ 9.3155e-02,  1.0374e-01,  6.7547e-02,  ..., -2.0277e-01,
           -1.2839e-01, -1.1220e-01],
          [ 4.3544e-02,  6.4916e-02,  3.6164e-02,  ..., -2.0248e-01,
           -1.1376e-01, -1.0719e-01],
          [ 4.7369e-02,  6.2543e-02,  2.4758e-02,  ..., -1.1844e-01,
           -9.5567e-02, -8.3890e-02]],

         [[-7.2634e-02, -5.7996e-02, -8.0661e-02,  ..., -6.0304e-04,
           -2.5309e-02,  2.5471e-02],
          [-6.9042e-02, -6.7562e-02, -7.6367e-02,  ..., -3.9616e-03,
           -3.0402e-02,  1.0477e-02],
          [-9.9517e-02, -8.5592e-02, -1.0521e-01,  ..., -2.6587e-02,
           -2.2777e-02,  6.6451e-03

In [16]:
#truncating neural net layers
l = list(model.children())
delete = 6 #number of truncating layers from the model
nn_trunc = nn.Sequential(*l[0][:len(l[0])-delete])

In [17]:
print(nn_trunc)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


In [18]:
#output of passing image to model
out = nn_trunc(batch_t)
print(out.shape)

torch.Size([1, 384, 13, 13])


In [27]:
#reshaping output to 2 dims
arr = out.view(out.shape[0]*out.shape[1]*out.shape[2]*out.shape[3])
print(arr, arr.shape)

tensor([ -6.3573, -17.1749,  -8.2156,  ...,   5.6634,  -3.7358,  -5.0717],
       grad_fn=<ViewBackward>) torch.Size([64896])


In [28]:
#Dimension Reductionality
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [31]:
#T-SNE
linear_embedded = TSNE(n_components=2).fit_transform(arr.detach().numpy())
print(linear_embedded)
print("shape:", linear_embedded2.shape)

KeyboardInterrupt: 