In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from pylab import *


In [15]:
# Load a pretrained ResNet-18 model
resnet18 = models.resnet18(pretrained=True)
resnet18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [17]:
# Remove the last classification layer
model = nn.Sequential(*list(resnet18.children())[:-2])


In [14]:
# Set the model to evaluation mode
model.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

In [18]:
# Load an image
image_path = fr'C:\Users\itays\Desktop\accumulated_files\images\im3.jpg'  # Replace with the path to your image
image = Image.open(image_path)

# Define the sliding window size and stride
window_size = 224  # Size of the window (224x224 for ResNet-18)
stride = 32        # Stride for sliding the window (adjust as needed)

# Transform the image to match the model's input requirements
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

image = transform(image).unsqueeze(0)  # Add batch dimension


In [23]:
image.shape
model(image).shape

torch.Size([1, 512, 7, 7])

In [36]:
model[-1][-1].conv2.weight.shape
# model

torch.Size([512, 512, 3, 3])

In [33]:
# Get the height and width of the feature map
height, width = model[-1][-1].conv2.weight.shape[:-2]

In [None]:
# Initialize a tensor to store the predictions
predictions = torch.zeros((1, height, width))

# Slide the window over the feature map
for i in range(0, height - window_size + 1, stride):
    for j in range(0, width - window_size + 1, stride):
        # Extract the window from the image
        window = image[:, :, i:i+window_size, j:j+window_size]

        # Forward pass through the model
        with torch.no_grad():
            output = model(window)

        # Compute the probability of class 1 (human)
        probability = torch.softmax(output, dim=1)[:, 1].item()

        # Store the probability in the predictions tensor
        predictions[:, i:i+window_size, j:j+window_size] = probability

# Define a threshold for considering a region as containing a human
threshold = 0.5

# Create a binary mask where values above the threshold are considered as humans
human_mask = (predictions > threshold).float()

# You can use post-processing techniques like non-maximum suppression here if needed
# to remove redundant or overlapping detections

# Visualize the result if desired
import matplotlib.pyplot as plt
plt.imshow(human_mask.squeeze().numpy(), cmap='gray')
plt.show()


# DEBUG

In [None]:
import torch
import torchvision.models as models

# Load the pre-trained ResNet-18 model
model = models.resnet18(pretrained=True)
model.eval()  # Set the model to evaluation mode

In [54]:
# Define a sample input image (you should replace this with your image data)
input_image = torch.randn(1, 3, 28, 28)  # Batch size of 1, 3 channels (RGB), 224x224 pixels

# Pass the input image through the model
with torch.no_grad():
    x = input_image
    print(f"Layer: Prior, Feature Map Size: {x.size()}")
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.modules.linear.Linear):
            x = torch.flatten(x, 1)
            print(f"Layer: Flatten, Feature Map Size: {x.size()}")
        x = layer(x)
        print(f"Layer: {name}, Feature Map Size: {x.size()}")

Layer: Prior, Feature Map Size: torch.Size([1, 3, 28, 28])
Layer: conv1, Feature Map Size: torch.Size([1, 64, 14, 14])
Layer: bn1, Feature Map Size: torch.Size([1, 64, 14, 14])
Layer: relu, Feature Map Size: torch.Size([1, 64, 14, 14])
Layer: maxpool, Feature Map Size: torch.Size([1, 64, 7, 7])
Layer: layer1, Feature Map Size: torch.Size([1, 64, 7, 7])
Layer: layer2, Feature Map Size: torch.Size([1, 128, 4, 4])
Layer: layer3, Feature Map Size: torch.Size([1, 256, 2, 2])
Layer: layer4, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: avgpool, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: Flatten, Feature Map Size: torch.Size([1, 512])
Layer: fc, Feature Map Size: torch.Size([1, 1000])


In [63]:
448/28
model(input_image).shape
layer.weight.shape

torch.Size([1000, 512])

In [53]:
# Define a sample input image (you should replace this with your image data)
input_image = torch.randn(1, 3, 56, 56)  # Batch size of 1, 3 channels (RGB), 224x224 pixels

# Pass the input image through the model
with torch.no_grad():
    x = input_image
    print(f"Layer: Prior, Feature Map Size: {x.size()}")
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.modules.linear.Linear):
            x = torch.flatten(x, 1)
            print(f"Layer: Flatten, Feature Map Size: {x.size()}")
        x = layer(x)
        print(f"Layer: {name}, Feature Map Size: {x.size()}")

Layer: Prior, Feature Map Size: torch.Size([1, 3, 56, 56])
Layer: conv1, Feature Map Size: torch.Size([1, 64, 28, 28])
Layer: bn1, Feature Map Size: torch.Size([1, 64, 28, 28])
Layer: relu, Feature Map Size: torch.Size([1, 64, 28, 28])
Layer: maxpool, Feature Map Size: torch.Size([1, 64, 14, 14])
Layer: layer1, Feature Map Size: torch.Size([1, 64, 14, 14])
Layer: layer2, Feature Map Size: torch.Size([1, 128, 7, 7])
Layer: layer3, Feature Map Size: torch.Size([1, 256, 4, 4])
Layer: layer4, Feature Map Size: torch.Size([1, 512, 2, 2])
Layer: avgpool, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: Flatten, Feature Map Size: torch.Size([1, 512])
Layer: fc, Feature Map Size: torch.Size([1, 1000])


In [52]:
# Define a sample input image (you should replace this with your image data)
input_image = torch.randn(1, 3, 112, 112)  # Batch size of 1, 3 channels (RGB), 224x224 pixels

# Pass the input image through the model
with torch.no_grad():
    x = input_image
    print(f"Layer: Prior, Feature Map Size: {x.size()}")
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.modules.linear.Linear):
            x = torch.flatten(x, 1)
            print(f"Layer: Flatten, Feature Map Size: {x.size()}")
        x = layer(x)
        print(f"Layer: {name}, Feature Map Size: {x.size()}")

Layer: Prior, Feature Map Size: torch.Size([1, 3, 112, 112])
Layer: conv1, Feature Map Size: torch.Size([1, 64, 56, 56])
Layer: bn1, Feature Map Size: torch.Size([1, 64, 56, 56])
Layer: relu, Feature Map Size: torch.Size([1, 64, 56, 56])
Layer: maxpool, Feature Map Size: torch.Size([1, 64, 28, 28])
Layer: layer1, Feature Map Size: torch.Size([1, 64, 28, 28])
Layer: layer2, Feature Map Size: torch.Size([1, 128, 14, 14])
Layer: layer3, Feature Map Size: torch.Size([1, 256, 7, 7])
Layer: layer4, Feature Map Size: torch.Size([1, 512, 4, 4])
Layer: avgpool, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: Flatten, Feature Map Size: torch.Size([1, 512])
Layer: fc, Feature Map Size: torch.Size([1, 1000])


In [51]:
# Define a sample input image (you should replace this with your image data)
input_image = torch.randn(1, 3, 224, 224)  # Batch size of 1, 3 channels (RGB), 224x224 pixels

# Pass the input image through the model
with torch.no_grad():
    x = input_image
    print(f"Layer: Prior, Feature Map Size: {x.size()}")
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.modules.linear.Linear):
            x = torch.flatten(x, 1)
            print(f"Layer: Flatten, Feature Map Size: {x.size()}")
        x = layer(x)
        print(f"Layer: {name}, Feature Map Size: {x.size()}")

Layer: Prior, Feature Map Size: torch.Size([1, 3, 224, 224])
Layer: conv1, Feature Map Size: torch.Size([1, 64, 112, 112])
Layer: bn1, Feature Map Size: torch.Size([1, 64, 112, 112])
Layer: relu, Feature Map Size: torch.Size([1, 64, 112, 112])
Layer: maxpool, Feature Map Size: torch.Size([1, 64, 56, 56])
Layer: layer1, Feature Map Size: torch.Size([1, 64, 56, 56])
Layer: layer2, Feature Map Size: torch.Size([1, 128, 28, 28])
Layer: layer3, Feature Map Size: torch.Size([1, 256, 14, 14])
Layer: layer4, Feature Map Size: torch.Size([1, 512, 7, 7])
Layer: avgpool, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: Flatten, Feature Map Size: torch.Size([1, 512])
Layer: fc, Feature Map Size: torch.Size([1, 1000])


In [50]:
# Define a sample input image (you should replace this with your image data)
input_image = torch.randn(1, 3, 448, 448)  # Batch size of 1, 3 channels (RGB), 224x224 pixels

# Pass the input image through the model
with torch.no_grad():
    x = input_image
    print(f"Layer: Prior, Feature Map Size: {x.size()}")
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.modules.linear.Linear):
            x = torch.flatten(x, 1)
            print(f"Layer: Flatten, Feature Map Size: {x.size()}")
        x = layer(x)
        print(f"Layer: {name}, Feature Map Size: {x.size()}")

Layer: Prior, Feature Map Size: torch.Size([1, 3, 448, 448])
Layer: conv1, Feature Map Size: torch.Size([1, 64, 224, 224])
Layer: bn1, Feature Map Size: torch.Size([1, 64, 224, 224])
Layer: relu, Feature Map Size: torch.Size([1, 64, 224, 224])
Layer: maxpool, Feature Map Size: torch.Size([1, 64, 112, 112])
Layer: layer1, Feature Map Size: torch.Size([1, 64, 112, 112])
Layer: layer2, Feature Map Size: torch.Size([1, 128, 56, 56])
Layer: layer3, Feature Map Size: torch.Size([1, 256, 28, 28])
Layer: layer4, Feature Map Size: torch.Size([1, 512, 14, 14])
Layer: avgpool, Feature Map Size: torch.Size([1, 512, 1, 1])
Layer: Flatten, Feature Map Size: torch.Size([1, 512])
Layer: fc, Feature Map Size: torch.Size([1, 1000])


In [67]:
# target output size of 5x7
m = nn.AdaptiveAvgPool2d((5, 7))
input = torch.randn(1, 64, 8, 9)
output = m(input)
print(output.shape)

# target output size of 7x7 (square)
m = nn.AdaptiveAvgPool2d(7)
input = torch.randn(1, 64, 10, 9)
output = m(input)
print(output.shape)


# target output size of 10x7
m = nn.AdaptiveAvgPool2d((None, 7))
input = torch.randn(1, 64, 10, 9)
output = m(input)
print(output.shape)

torch.Size([1, 64, 5, 7])
torch.Size([1, 64, 7, 7])
torch.Size([1, 64, 10, 7])
