In [1]:
import numpy as np

In [11]:
x = np.random.rand(10, 1, 28, 28)
print(x.shape)
print(x[0].shape)
print(x[1].shape)
print(x[0,0].shape)
print(x[1,0].shape)

(10, 1, 28, 28)
(1, 28, 28)
(1, 28, 28)
(28, 28)
(28, 28)


In [12]:
from ref.common.util import im2col

In [16]:
x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [17]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        FN, C, FH, FW = self.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
        
        return out

In [3]:
with open("./datasets/imagenet_classes.txt", "r") as file:
    classes = file.read().splitlines()
    
print(len(classes))
print(classes[0])

1000
tench


In [5]:
import torch
from PIL import Image
from torchvision import models, transforms

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ]
)

device = "mps" if torch.mps.is_available() else "cpu"
model = models.alexnet(weights="AlexNet_Weights.IMAGENET1K_V1").eval().to(device)

tensors = []
files = ["./datasets/images/airplane.jpg", "./datasets/images/bus.jpg"]
for file in files:
    image = Image.open(file)
    tensors.append(transform(image))
    
tensors = torch.stack(tensors)
print(tensors.shape)

torch.Size([2, 3, 224, 224])


In [6]:
import numpy as np
from torch.nn import functional as F

with torch.no_grad():
    outputs = model(tensors.to(device))
    probs = F.softmax(outputs, dim=1)
    top_probs, top_idxs = probs.topk(3)
    
top_probs = top_probs.detach().cpu().numpy()
top_idxs = top_idxs.detach().cpu().numpy()
top_classes = np.array(classes)[top_idxs]

for idx, (cls, prob) in enumerate(zip(top_classes, top_probs)):
    print(files[idx])
    for c, p in zip(cls, prob):
        print(f"{c:<30}, {p*100:>5.2f}")

./datasets/images/airplane.jpg
airliner                      , 66.83
warplane                      , 20.12
wing                          ,  9.29
./datasets/images/bus.jpg
streetcar                     , 60.25
trolleybus                    , 37.99
minibus                       ,  1.54
