In [26]:
import torch
import torchvision.models as models

# Load a pre-trained ResNet model
model = models.resnet50()  # Use the correct model architecture

# Modify the last layer to match the number of classes in your multi-label task
num_classes = 5  # Replace with the actual number of classes
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Load the state_dict
model.load_state_dict(torch.load('multi_label_classifier.pth'), strict=False)


# Set to evaluation mode
model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [27]:
from PIL import Image
import torchvision.transforms as transforms

def preprocess_image(image_path):
    # Define your image transforms (e.g., resizing, normalization)
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to match model input size
        transforms.ToTensor(),          # Convert image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
    ])
    
    # Open image and apply transforms
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    
    # Add batch dimension (1, 3, 224, 224)
    image = image.unsqueeze(0)
    
    return image


In [28]:
def predict(image_path, model, device='cpu'):
    # Preprocess the image
    image = preprocess_image(image_path)
    
    # Move the image to the correct device (GPU or CPU)
    image = image.to(device)
    model = model.to(device)
    
    # Perform inference
    with torch.no_grad():
        outputs = model(image)
        
    # Assuming the outputs are logits, apply sigmoid to get probabilities
    probabilities = torch.sigmoid(outputs)
    
    return probabilities.cpu().numpy()  # Convert to numpy array for further processing


In [36]:
# Define your class names
class_names = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']  # Replace with actual class names

def interpret_predictions(probabilities, threshold=0.5):
    predicted_labels = []
    for i, prob in enumerate(probabilities[0]):
        if prob >= threshold:
            predicted_labels.append(class_names[i])
    return predicted_labels


In [59]:
def classify_image(image_path, model, threshold=0.5, device='cpu'):
    probabilities = predict(image_path, model, device)
    predicted_labels = interpret_predictions(probabilities, threshold)
    return predicted_labels

# Example usage
image_path = 'Bikes.jpg'
predicted_labels = classify_image(image_path, model, threshold=0.55, device='cuda')  # Use 'cuda' if using GPU
print(f'Predicted labels: {predicted_labels}')


Predicted labels: ['car', 'pedestrian', 'light']
