In [37]:
import math
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as tvf
import torchvision.transforms as tvtfms
import operator as op
from PIL import Image
from torch import nn
from timm import create_model

# For type hinting later on
import collections
import typing

In [38]:
# num_classes must match the vocab
model = create_model("convnext_tiny", pretrained=False, num_classes=2, in_chans=3)

In [39]:
state = torch.load("models/convnext_tiny_best_model.pth")

  state = torch.load("models/convnext_tiny_best_model.pth")


In [40]:
list(model.state_dict().keys())

['stem.0.weight',
 'stem.0.bias',
 'stem.1.weight',
 'stem.1.bias',
 'stages.0.blocks.0.gamma',
 'stages.0.blocks.0.conv_dw.weight',
 'stages.0.blocks.0.conv_dw.bias',
 'stages.0.blocks.0.norm.weight',
 'stages.0.blocks.0.norm.bias',
 'stages.0.blocks.0.mlp.fc1.weight',
 'stages.0.blocks.0.mlp.fc1.bias',
 'stages.0.blocks.0.mlp.fc2.weight',
 'stages.0.blocks.0.mlp.fc2.bias',
 'stages.0.blocks.1.gamma',
 'stages.0.blocks.1.conv_dw.weight',
 'stages.0.blocks.1.conv_dw.bias',
 'stages.0.blocks.1.norm.weight',
 'stages.0.blocks.1.norm.bias',
 'stages.0.blocks.1.mlp.fc1.weight',
 'stages.0.blocks.1.mlp.fc1.bias',
 'stages.0.blocks.1.mlp.fc2.weight',
 'stages.0.blocks.1.mlp.fc2.bias',
 'stages.0.blocks.2.gamma',
 'stages.0.blocks.2.conv_dw.weight',
 'stages.0.blocks.2.conv_dw.bias',
 'stages.0.blocks.2.norm.weight',
 'stages.0.blocks.2.norm.bias',
 'stages.0.blocks.2.mlp.fc1.weight',
 'stages.0.blocks.2.mlp.fc1.bias',
 'stages.0.blocks.2.mlp.fc2.weight',
 'stages.0.blocks.2.mlp.fc2.bias',
 '

In [41]:
list(model.state_dict().keys())[-12:]

['stages.3.blocks.2.conv_dw.weight',
 'stages.3.blocks.2.conv_dw.bias',
 'stages.3.blocks.2.norm.weight',
 'stages.3.blocks.2.norm.bias',
 'stages.3.blocks.2.mlp.fc1.weight',
 'stages.3.blocks.2.mlp.fc1.bias',
 'stages.3.blocks.2.mlp.fc2.weight',
 'stages.3.blocks.2.mlp.fc2.bias',
 'head.norm.weight',
 'head.norm.bias',
 'head.fc.weight',
 'head.fc.bias']

In [42]:
list(state.keys())[-12:]

['stages.3.blocks.2.conv_dw.weight',
 'stages.3.blocks.2.conv_dw.bias',
 'stages.3.blocks.2.norm.weight',
 'stages.3.blocks.2.norm.bias',
 'stages.3.blocks.2.mlp.fc1.weight',
 'stages.3.blocks.2.mlp.fc1.bias',
 'stages.3.blocks.2.mlp.fc2.weight',
 'stages.3.blocks.2.mlp.fc2.bias',
 'head.norm.weight',
 'head.norm.bias',
 'head.fc.weight',
 'head.fc.bias']

In [43]:
# Expected to fail
model.load_state_dict(state)

<All keys matched successfully>

In [44]:
# This also works
# model = torch.load('models/convnext_tiny_best_model.pth')

In [45]:
import typing
from PIL import Image
import torchvision.transforms.functional as tvf

In [60]:
def crop(image: typing.Union[Image.Image, torch.Tensor], size: typing.Tuple[int, int]) -> typing.Union[Image.Image, torch.Tensor]:
    """
    Takes a `PIL.Image` or `torch.Tensor` and crops it to `size` unless one 
    dimension is larger than the actual image. Padding must be performed afterwards if so.
    
    Args:
        image (`PIL.Image` or `torch.Tensor`):
            An image to perform cropping on
        size (`tuple` of integers):
            A size to crop to, should be in the form of (width, height)
            
    Returns:
        An augmented `PIL.Image` or `torch.Tensor`
    """
    if isinstance(image, Image.Image):
        width, height = image.size
    elif isinstance(image, torch.Tensor):
        height, width = image.shape[-2], image.shape[-1]
    else:
        raise TypeError("Unsupported image type")

    top = (height - size[1]) // 2
    left = (width - size[0]) // 2
    
    top = max(top, 0)
    left = max(left, 0)
    
    bottom = min(top + size[1], height)
    right = min(left + size[0], width)

    if isinstance(image, Image.Image):
        return image.crop((left, top, right, bottom))
    elif isinstance(image, torch.Tensor):
        return image[:, :, top:bottom, left:right]

In [64]:
from PIL import Image, ImageOps
import torch
import typing

def pad(image: typing.Union[Image.Image, torch.Tensor], size: typing.Tuple[int, int]) -> typing.Union[Image.Image, torch.Tensor]:
    """
    Pads the given image to the specified size.
    
    Args:
        image (`PIL.Image` or `torch.Tensor`):
            An image to perform padding on
        size (`tuple` of integers):
            A size to pad to, should be in the form of (width, height)
            
    Returns:
        A padded `PIL.Image` or `torch.Tensor`
    """
    if isinstance(image, Image.Image):
        width, height = image.size
    elif isinstance(image, torch.Tensor):
        height, width = image.shape[-2], image.shape[-1]
    else:
        raise TypeError("Unsupported image type")

    top = (size[1] - height) // 2
    left = (size[0] - width) // 2
    
    pad_top = max(-top, 0)
    pad_left = max(-left, 0)
    pad_bottom = max(size[1] - height - pad_top, 0)
    pad_right = max(size[0] - width - pad_left, 0)

    if isinstance(image, Image.Image):
        padding = (pad_left, pad_top, pad_right, pad_bottom)
        return ImageOps.expand(image, padding)
    elif isinstance(image, torch.Tensor):
        padding = (pad_left, pad_right, pad_top, pad_bottom)
        return torch.nn.functional.pad(image, padding, mode='constant', value=0)


In [48]:
def gpu_crop(
    batch:torch.tensor, 
    size:typing.Tuple[int,int]
):
    """
    Crops each image in `batch` to a particular `size`.
    
    Args:
        batch (array of `torch.Tensor`):
            A batch of images, should be of shape `NxCxWxH`
        size (`tuple` of integers):
            A size to pad to, should be in the form
            of (width, height)
            
    Returns:
        A batch of cropped images
    """
   
    affine_matrix = torch.eye(3, device=batch.device).float()
    affine_matrix = affine_matrix.unsqueeze(0)
    affine_matrix = affine_matrix.expand(batch.size(0), 3, 3)
    affine_matrix = affine_matrix.contiguous()[:,:2]
    
    coords = F.affine_grid(
        affine_matrix, batch.shape[:2] + size, align_corners=True
    )
    
    top_range, bottom_range = coords.min(), coords.max()
    zoom = 1/(bottom_range - top_range).item()*2
    
    resizing_limit = min(
        batch.shape[-2]/coords.shape[-2],
        batch.shape[-1]/coords.shape[-1]
    )/2
    
    if resizing_limit > 1 and resizing_limit > zoom:
        batch = F.interpolate(
            batch, 
            scale_factor=1/resizing_limit, 
            mode='area', 
            recompute_scale_factor=True
        )
    return F.grid_sample(batch, coords, mode='bilinear', padding_mode='reflection', align_corners=True)

In [49]:
tt_torch = tvtfms.ToTensor()

In [51]:
import torch
from torchvision import transforms
from PIL import Image

# define the labels as the came from dls.vocab
class_labels = ['no', 'yes']

# Define the image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to the size expected by the model
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

# Load the image
image_path = 'processed/valid/yes/Y14.jpg'
image = Image.open(image_path)
# image = transform(image).unsqueeze(0)  # Add a batch dimension
image = gpu_crop(tt_torch(image).unsqueeze(0), (224, 224))

print(type(model))
model.eval()  # Set the model to evaluation mode

# Run the image through the model
with torch.no_grad():
    output = model(image)

# Get the predicted class
_, predicted_class = torch.max(output, 1)
predicted_index = predicted_class.item()

# Map the index to the class label
predicted_label = class_labels[predicted_index]

print(f'Predicted class index: {predicted_index}')
print(f'Predicted class label: {predicted_label}')

<class 'timm.models.convnext.ConvNeXt'>
Predicted class index: 1
Predicted class label: yes


In [69]:
import torch
from torchvision import transforms
from PIL import Image
import os

# Define the labels as they came from dls.vocab
class_labels = ['no', 'yes']

# Define the image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to the size expected by the model
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

# Path to the "no" folder
folder_path = 'processed/valid/no'

# List all image files in the "no" folder
image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

model.eval()  # Set the model to evaluation mode

predictions = []

# Loop through each image file
for image_file in image_files:
    # print(f'Processing Image: {image_file}')
    image_path = os.path.join(folder_path, image_file)
    image = Image.open(image_path)

    # print(f'Image shape: {image.size}')
    # print no of channels
    # print(f'Image channels: {len(image.getbands())}')
    
    # Convert single-channel images to three-channel
    if image.mode != 'RGB':
        image = image.convert('RGB')

    image = gpu_crop(tt_torch(image).unsqueeze(0), (224, 224))
    # image = transform(image).unsqueeze(0)  # Add a batch dimension

    # Run the image through the model
    with torch.no_grad():
        output = model(image)

    # Get the predicted class
    _, predicted_class = torch.max(output, 1)
    predicted_index = predicted_class.item()

    # Map the index to the class label
    predicted_label = class_labels[predicted_index]

    predictions.append((image_file, predicted_index, predicted_label))
    # print(f'Image: {image_file}')
    # print(f'Predicted class index: {predicted_index}')
    # print(f'Predicted class label: {predicted_label}')
    # print('---')
    
print(predictions)

[('18 no.jpg', 0, 'no'), ('27 no.jpg', 0, 'no'), ('4 no.jpg', 0, 'no'), ('42 no.jpg', 0, 'no'), ('N6.jpg', 0, 'no'), ('no 89.jpg', 0, 'no'), ('no 9.png', 0, 'no'), ('no 99.jpg', 0, 'no'), ('No14.jpg', 0, 'no')]


In [70]:
import torch
from torchvision import transforms
from PIL import Image
import os

# Define the labels as they came from dls.vocab
class_labels = ['no', 'yes']

# Define the image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to the size expected by the model
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

# Path to the "no" folder
folder_path = 'processed/valid/yes'

# List all image files in the "no" folder
image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

model.eval()  # Set the model to evaluation mode

predictions = []

# Loop through each image file
for image_file in image_files:
    # print(f'Processing Image: {image_file}')
    image_path = os.path.join(folder_path, image_file)
    image = Image.open(image_path)

    # print(f'Image shape: {image.size}')
    # print no of channels
    # print(f'Image channels: {len(image.getbands())}')
    
    # Convert single-channel images to three-channel
    if image.mode != 'RGB':
        image = image.convert('RGB')

    image = gpu_crop(tt_torch(image).unsqueeze(0), (224, 224))
    # image = transform(image).unsqueeze(0)  # Add a batch dimension

    # Run the image through the model
    with torch.no_grad():
        output = model(image)

    # Get the predicted class
    _, predicted_class = torch.max(output, 1)
    predicted_index = predicted_class.item()

    # Map the index to the class label
    predicted_label = class_labels[predicted_index]

    predictions.append((image_file, predicted_index, predicted_label))
    # print(f'Image: {image_file}')
    # print(f'Predicted class index: {predicted_index}')
    # print(f'Predicted class label: {predicted_label}')
    # print('---')
    
print(predictions)

[('Y108.jpg', 0, 'no'), ('Y109.JPG', 0, 'no'), ('Y14.jpg', 1, 'yes'), ('Y159.JPG', 0, 'no'), ('Y19.JPG', 1, 'yes'), ('Y22.jpg', 1, 'yes'), ('Y246.JPG', 1, 'yes'), ('Y250.jpg', 1, 'yes'), ('Y27.jpg', 1, 'yes'), ('Y33.jpg', 0, 'no'), ('Y34.jpg', 1, 'yes'), ('Y42.jpg', 0, 'no'), ('Y51.jpg', 1, 'yes'), ('Y56.jpg', 0, 'no'), ('Y65.JPG', 1, 'yes')]


In [68]:
import torch
from torchvision import transforms
from PIL import Image
import os

# Define the labels as they came from dls.vocab
class_labels = ['no', 'yes']

# Define the image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to the size expected by the model
    transforms.ToTensor(),          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

# Path to the "no" folder
folder_path = 'processed/valid/yes'

# List all image files in the "no" folder
image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

model.eval()  # Set the model to evaluation mode

predictions = []

# Loop through each image file
for image_file in image_files:
    # print(f'Processing Image: {image_file}')
    image_path = os.path.join(folder_path, image_file)
    image = Image.open(image_path)

    # print(f'Image shape: {image.size}')
    # print no of channels
    # print(f'Image channels: {len(image.getbands())}')
    
    # Convert single-channel images to three-channel
    if image.mode != 'RGB':
        image = image.convert('RGB')

    image = pad(crop(image, (224, 224)), (224, 224))
    image = tvtfms.ToTensor()(image).unsqueeze(0)  # Add a batch dimension
    image = gpu_crop(image, (224, 224))
    # image = transform(image).unsqueeze(0)  # Add a batch dimension

    # Run the image through the model
    with torch.no_grad():
        output = model(image)

    # Get the predicted class
    _, predicted_class = torch.max(output, 1)
    predicted_index = predicted_class.item()

    # Map the index to the class label
    predicted_label = class_labels[predicted_index]

    predictions.append((image_file, predicted_index, predicted_label))
    # print(f'Image: {image_file}')
    # print(f'Predicted class index: {predicted_index}')
    # print(f'Predicted class label: {predicted_label}')
    # print('---')

# doing these transforms gives a terrible result    
print(predictions)

[('Y108.jpg', 0, 'no'), ('Y109.JPG', 0, 'no'), ('Y14.jpg', 1, 'yes'), ('Y159.JPG', 0, 'no'), ('Y19.JPG', 1, 'yes'), ('Y22.jpg', 0, 'no'), ('Y246.JPG', 1, 'yes'), ('Y250.jpg', 1, 'yes'), ('Y27.jpg', 0, 'no'), ('Y33.jpg', 0, 'no'), ('Y34.jpg', 1, 'yes'), ('Y42.jpg', 0, 'no'), ('Y51.jpg', 0, 'no'), ('Y56.jpg', 0, 'no'), ('Y65.JPG', 1, 'yes')]
