In [2]:
import torchvision.models as models
import torch
import torch.nn as nn

In [3]:
alexnet = models.alexnet(pretrained = True)

In [4]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
from torchvision import transforms
transform = transforms.Compose([            #[1]
 transforms.Resize(256),                    #[2]
 transforms.CenterCrop(224),                #[3]
 transforms.ToTensor(),                     #[4]
 transforms.Normalize(                      #[5]
 mean=[0.485, 0.456, 0.406],                #[6]
 std=[0.229, 0.224, 0.225]                  #[7]
 )])

In [6]:
# Import Pillow
from PIL import Image
img = Image.open("dog.jpg")

In [7]:
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0) # batch_t is our transformed image

In [8]:
alexnet.eval() # change alexnet model to eval mode

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
out = alexnet(batch_t)
print(out.shape)

torch.Size([1, 1000])


In [10]:
with open('imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

In [11]:
_, index = torch.max(out, 1)
 
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
 
print(labels[index[0]], percentage[index[0]].item())

Labrador retriever 41.585166931152344


In [12]:
_, indices = torch.sort(out, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]


[('Labrador retriever', 41.585166931152344),
 ('golden retriever', 16.59166145324707),
 ('Saluki, gazelle hound', 16.286880493164062),
 ('whippet', 2.8539133071899414),
 ('Ibizan hound, Ibizan Podenco', 2.3924720287323)]

In [13]:
model = models.alexnet(pretrained=True)

In [14]:
i = 0
j = 0
a = []
for child in model.children():
    print("child", i,":")
    if isinstance(child, nn.AdaptiveAvgPool2d):
        print("\t" + str(child))
    else:
        for param in child:
            print("\tLayer", j ,":", param)
            instance = isinstance(param, nn.ReLU) or isinstance(param, nn.MaxPool2d) or isinstance(param, nn.Dropout)
            if not instance:
                a.append(param.weight)
            j += 1
    i += 1
# need to pick layer to read out the features from passing an image through the network 

child 0 :
	Layer 0 : Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
	Layer 1 : ReLU(inplace=True)
	Layer 2 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
	Layer 3 : Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
	Layer 4 : ReLU(inplace=True)
	Layer 5 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
	Layer 6 : Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 7 : ReLU(inplace=True)
	Layer 8 : Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 9 : ReLU(inplace=True)
	Layer 10 : Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	Layer 11 : ReLU(inplace=True)
	Layer 12 : MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
child 1 :
	AdaptiveAvgPool2d(output_size=(6, 6))
child 2 :
	Layer 13 : Dropout(p=0.5, inplace=False)
	Layer 14 : Linear(in_features=9216, out_features=4096, bias=True)
	Layer 15 : ReLU(i

In [15]:
#weights
print(a)

[Parameter containing:
tensor([[[[ 1.1864e-01,  9.4069e-02,  9.5435e-02,  ...,  5.5822e-02,
            2.1575e-02,  4.9963e-02],
          [ 7.4882e-02,  3.8940e-02,  5.2979e-02,  ...,  2.5709e-02,
           -1.1299e-02,  4.1590e-03],
          [ 7.5425e-02,  3.8779e-02,  5.4930e-02,  ...,  4.3596e-02,
            1.0225e-02,  1.3251e-02],
          ...,
          [ 9.3155e-02,  1.0374e-01,  6.7547e-02,  ..., -2.0277e-01,
           -1.2839e-01, -1.1220e-01],
          [ 4.3544e-02,  6.4916e-02,  3.6164e-02,  ..., -2.0248e-01,
           -1.1376e-01, -1.0719e-01],
          [ 4.7369e-02,  6.2543e-02,  2.4758e-02,  ..., -1.1844e-01,
           -9.5567e-02, -8.3890e-02]],

         [[-7.2634e-02, -5.7996e-02, -8.0661e-02,  ..., -6.0304e-04,
           -2.5309e-02,  2.5471e-02],
          [-6.9042e-02, -6.7562e-02, -7.6367e-02,  ..., -3.9616e-03,
           -3.0402e-02,  1.0477e-02],
          [-9.9517e-02, -8.5592e-02, -1.0521e-01,  ..., -2.6587e-02,
           -2.2777e-02,  6.6451e-03

In [16]:
#truncating neural net layers
l = list(model.children())
delete = 6 #number of truncating layers from the model
nn_trunc = nn.Sequential(*l[0][:len(l[0])-delete])

In [17]:
print(nn_trunc)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


In [18]:
#output of passing image to model
out = nn_trunc(batch_t)
print(out.shape)

torch.Size([1, 384, 13, 13])


In [19]:
#reshaping output to 2 dims
arr = out.view(out.shape[0]*out.shape[1], out.shape[2]*out.shape[3])
print(arr, arr.shape)

tensor([[ -6.3573, -17.1749,  -8.2156,  ...,  -3.9201,  -0.4726,   0.0984],
        [  2.2054,  -8.7740, -12.4475,  ...,   0.1397,   4.8079,  10.2505],
        [-17.4003, -18.9014, -17.7882,  ..., -14.9688, -16.8982, -10.7638],
        ...,
        [  9.0788,  -2.8113,   1.3423,  ...,  -2.0614,  -4.1276,   2.0983],
        [-10.5476,  -6.7587,   2.7620,  ...,  -1.0289,  -4.3865,  -4.1590],
        [-14.1324, -20.0958, -16.0254,  ...,   5.6634,  -3.7358,  -5.0717]],
       grad_fn=<ViewBackward>) torch.Size([384, 169])


In [20]:
#Dimension Reductionality
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [21]:
#PCA
linear_embedded = PCA(n_components=3).fit_transform(arr.detach().numpy().T)
print(linear_embedded)
print("shape:", linear_embedded.shape)

[[ 9.37540131e+01  3.43030396e+01 -1.43858280e+01]
 [ 1.17208572e+02  5.16588440e+01 -4.16133614e+01]
 [ 5.00103416e+01  1.08511353e+01 -2.34875736e+01]
 [-9.60087395e+00 -1.99946136e+01 -1.91679173e+01]
 [-7.74191513e+01 -1.29255466e+01 -1.22982893e+01]
 [-9.32192917e+01 -9.88823032e+00 -4.96596766e+00]
 [-7.84140549e+01 -1.55884953e+01  4.98670071e-01]
 [-6.02572517e+01 -2.23250332e+01  4.12035942e+00]
 [-2.97773590e+01 -3.31665649e+01  1.81718278e+00]
 [ 1.01839895e+01 -3.72253571e+01 -3.56840515e+00]
 [ 2.03648930e+01 -2.37509117e+01  2.54756570e-01]
 [ 3.42573662e+01 -1.88470039e+01  2.54633522e+01]
 [-2.01527095e+00 -1.03837261e+01  4.59669685e+01]
 [ 9.77490997e+01  5.57331505e+01 -2.20205879e+01]
 [ 1.27610054e+02  9.77224731e+01 -9.82858810e+01]
 [ 1.18475525e+02  5.93356667e+01 -1.09000374e+02]
 [ 1.20483170e+02 -1.05606670e+01 -8.37891617e+01]
 [ 2.93307838e+01 -2.68597431e+01 -4.39999542e+01]
 [-1.61247177e+01 -2.85591698e+01 -2.10849552e+01]
 [ 6.94976151e-01 -3.71451569e+

In [22]:
#T-SNE
linear_embedded2 = TSNE(n_components=1).fit_transform(linear_embedded)
print(linear_embedded2)
print("shape:", linear_embedded2.shape)

[[  7.6417727 ]
 [-12.22304   ]
 [ -7.6403975 ]
 [  1.2382336 ]
 [  4.7615757 ]
 [  5.2647724 ]
 [  4.704478  ]
 [  3.8266516 ]
 [  1.80266   ]
 [  0.52208304]
 [ -6.527779  ]
 [ -8.360313  ]
 [ -5.6145897 ]
 [-11.905009  ]
 [-12.484464  ]
 [ -0.6418299 ]
 [-12.583184  ]
 [ -6.8671303 ]
 [  1.4361004 ]
 [  0.9017903 ]
 [ -6.7523494 ]
 [ -7.001827  ]
 [ -7.161033  ]
 [ -7.5586853 ]
 [ -9.1041155 ]
 [ -9.284716  ]
 [ -5.4581337 ]
 [-12.038063  ]
 [-12.457136  ]
 [  8.388928  ]
 [ -0.815663  ]
 [ -3.897978  ]
 [ -3.3550293 ]
 [ -3.263481  ]
 [ -2.9364188 ]
 [ -2.7650564 ]
 [ -3.6275406 ]
 [ -8.715818  ]
 [ -8.78551   ]
 [ -5.5784574 ]
 [  7.7757463 ]
 [-12.540104  ]
 [-12.578046  ]
 [ -0.93893063]
 [ -2.6615705 ]
 [ -1.8011787 ]
 [ -1.8042179 ]
 [ -2.0383532 ]
 [ -2.5007348 ]
 [ -2.9485521 ]
 [ -3.963608  ]
 [ -5.654323  ]
 [ -3.6638658 ]
 [ -5.463747  ]
 [-11.362169  ]
 [ -0.9451441 ]
 [ -3.6425083 ]
 [ -1.8232675 ]
 [ -1.6290683 ]
 [ -1.7860835 ]
 [ -3.5784593 ]
 [  6.699883  ]
 [ -4.99