# Part 0: Choose pretrained image classification model and images to be explained

## Imports

We chose to use VGG16. We will download it pretrained and establish its performance on 100 images

In [21]:
import torch
import torchvision.models as models
from torchvision import transforms
import os
import requests
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image


In [10]:
model = models.vgg16(pretrained=True)

In [19]:
# Define the URL to download the ImageNet images
url = "http://image-net.org/image/"

# Define the subset of categories to download
categories = ["cat", "dog", "bird", "turtle"]  # Example categories

# Define the output directory to store the downloaded images
output_dir = "imagenet_subset"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Download the images for each category
for category in categories:
    category_dir = os.path.join(output_dir, category)
    os.makedirs(category_dir, exist_ok=True)

    # Download a fixed number of images per category
    # num_images_per_category = 26 if category != "cat" else 25
    num_images_per_category = 2
    print(f"Downloading {num_images_per_category} images from the {category} category:")
    for i in range(num_images_per_category):
        image_url = f"{url}/{category}/{category}_{i}.png"
        image_path = os.path.join(category_dir, f"{category}_{i}.png")

        # Download the image
        response = requests.get(image_url)

        # Save the image to the specified directory
        with open(image_path, "wb") as f:
            f.write(response.content)
        if i % 10 == 0:
            print(f"Downloaded: {image_path}")

Downloading 2 images from the cat category:
Downloaded: imagenet_subset\cat\cat_0.png
Downloading 2 images from the dog category:
Downloaded: imagenet_subset\dog\dog_0.png
Downloading 2 images from the bird category:
Downloaded: imagenet_subset\bird\bird_0.png
Downloading 2 images from the turtle category:
Downloaded: imagenet_subset\turtle\turtle_0.png


In [12]:
# Define the path to the directory containing the images
image_dir = "imagenet_subset"

# Define the path to save the model weights (if necessary)

# Define the number of images to evaluate
num_images = 100

# Define the batch size for evaluation
batch_size = 16

# Define the transformation for preprocessing the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])


# Set the model to evaluation mode
model.eval()

# Create the dataset and data loader
dataset = ImageFolder(image_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [15]:
dataset

Dataset ImageFolder
    Number of datapoints: 103
    Root location: imagenet_subset
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
           )

In [16]:
# Define a list to store the predicted labels
predicted_labels = []

# Iterate over the batches of images
for images, _ in dataloader:
    # Forward pass through the model
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

    # Append the predicted labels to the list
    predicted_labels.extend(predicted.tolist())

# Get the predicted labels for the evaluated images
evaluated_labels = predicted_labels[:num_images]

# Print the predicted labels for the evaluated images
print("Predicted labels for the evaluated images:")
print(evaluated_labels)


UnidentifiedImageError: cannot identify image file <_io.BufferedReader name='imagenet_subset\\bird\\bird_0.jpg'>

In [24]:
image = Image.open(r"imagenet_subset\bird\bird_0.jpg")

UnidentifiedImageError: cannot identify image file 'imagenet_subset\\bird\\bird_0.jpg'

In [27]:
import os
import urllib.request
import tarfile

# Define the URL to download the Oxford Pets Dataset
url = "http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz"

# Define the output directory to store the downloaded dataset
output_dir = "oxford_pets_dataset"

# Define the categories to download
categories = ["cats", "dogs"]

# Define the number of images to download per category
num_images_per_category = 3

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Download the dataset
dataset_file = os.path.join(output_dir, "images.tar.gz")
urllib.request.urlretrieve(url, dataset_file)

# Extract the dataset for the selected categories and desired number of images
with tarfile.open(dataset_file, "r:gz") as tar:
    extracted_images = 0
    for member in tar.getmembers():
        if extracted_images >= len(categories) * num_images_per_category:
            break

        for category in categories:
            if category in member.name:
                tar.extract(member, path=output_dir)
                extracted_images += 1
                if extracted_images % num_images_per_category == 0:
                    break

# Remove the downloaded tarball file
os.remove(dataset_file)

print("Oxford Pets Dataset subset downloaded and extracted successfully!")


Oxford Pets Dataset subset downloaded and extracted successfully!


# Part 1: Choose pretrained image classification model

In [46]:
im = Image.open(r"turtle.jpg")

In [58]:
im

tensor([[[[-2.0837, -2.1008, -2.1008,  ..., -2.0837, -2.1008, -2.0665],
          [-2.0837, -2.0837, -2.1008,  ..., -2.0837, -2.0665, -2.0837],
          [-2.0837, -2.1008, -2.1008,  ..., -2.0837, -2.0837, -2.1008],
          ...,
          [-2.0665, -2.0837, -2.0837,  ...,  1.5810,  1.7352,  1.9235],
          [-2.0837, -2.0837, -2.0665,  ...,  1.4612,  1.8037,  1.9064],
          [-2.0665, -2.0665, -2.0665,  ...,  1.3755,  1.6324,  1.5468]],

         [[-0.2500, -0.1625, -0.0749,  ..., -0.3200, -0.3550, -0.4076],
          [-0.1625, -0.1275, -0.0574,  ..., -0.2850, -0.3200, -0.3725],
          [-0.1450, -0.0749, -0.0049,  ..., -0.2850, -0.3025, -0.3200],
          ...,
          [-0.2675, -0.1800, -0.0224,  ...,  2.0434,  2.0259,  2.0434],
          [-0.4426, -0.2500,  0.0126,  ...,  2.0609,  1.9559,  2.0084],
          [-0.3901, -0.1800,  0.1001,  ...,  2.0084,  2.0084,  2.0784]],

         [[ 1.1585,  1.2108,  1.3328,  ...,  1.2282,  1.1759,  1.1237],
          [ 1.2282,  1.2805,  

In [48]:
model.eval()


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [49]:
im = transform(im).unsqueeze(0)
im

tensor([[[[-2.0837, -2.1008, -2.1008,  ..., -2.0837, -2.1008, -2.0665],
          [-2.0837, -2.0837, -2.1008,  ..., -2.0837, -2.0665, -2.0837],
          [-2.0837, -2.1008, -2.1008,  ..., -2.0837, -2.0837, -2.1008],
          ...,
          [-2.0665, -2.0837, -2.0837,  ...,  1.5810,  1.7352,  1.9235],
          [-2.0837, -2.0837, -2.0665,  ...,  1.4612,  1.8037,  1.9064],
          [-2.0665, -2.0665, -2.0665,  ...,  1.3755,  1.6324,  1.5468]],

         [[-0.2500, -0.1625, -0.0749,  ..., -0.3200, -0.3550, -0.4076],
          [-0.1625, -0.1275, -0.0574,  ..., -0.2850, -0.3200, -0.3725],
          [-0.1450, -0.0749, -0.0049,  ..., -0.2850, -0.3025, -0.3200],
          ...,
          [-0.2675, -0.1800, -0.0224,  ...,  2.0434,  2.0259,  2.0434],
          [-0.4426, -0.2500,  0.0126,  ...,  2.0609,  1.9559,  2.0084],
          [-0.3901, -0.1800,  0.1001,  ...,  2.0084,  2.0084,  2.0784]],

         [[ 1.1585,  1.2108,  1.3328,  ...,  1.2282,  1.1759,  1.1237],
          [ 1.2282,  1.2805,  

In [50]:
with torch.no_grad():
    outputs = model(im)

# Get the top predicted classes
_, predicted_indices = torch.topk(outputs, k=3)
predicted_indices = predicted_indices.squeeze().tolist()


In [51]:
import urllib.request

# Define the URL to download the ImageNet class labels
url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"

# Define the filename to save the class labels
filename = "imagenet_classes.txt"

# Download the class labels
urllib.request.urlretrieve(url, filename)

print("ImageNet class labels downloaded successfully!")


ImageNet class labels downloaded successfully!


In [52]:
with open("imagenet_classes.txt", "r") as f:
    class_names = [line.strip() for line in f.readlines()]

print("Top predicted classes:")
for index in predicted_indices:
    print(f"{class_names[index]}")


Top predicted classes:
"loggerhead sea turtle",
"leatherback sea turtle",
"terrapin",


In [55]:
!pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.7.0.72-cp37-abi3-win_amd64.whl (38.2 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.7.0.72


In [63]:
import cv2
import numpy as np

# Load the image
image = cv2.imread("turtle.jpg")

In [64]:
# Convert the image to LAB color space
image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)


In [66]:
!pip install opencv-contrib-python
# Create the SLIC superpixel object
slic = cv2.ximgproc.createSuperpixelSLIC(image_lab, algorithm='SLIC', region_size=10)

Collecting opencv-contrib-python
  Downloading opencv_contrib_python-4.7.0.72-cp37-abi3-win_amd64.whl (44.9 MB)
     ---------------------------------------- 44.9/44.9 MB 8.8 MB/s eta 0:00:00
Installing collected packages: opencv-contrib-python


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\User\\anaconda3\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.



AttributeError: module 'cv2' has no attribute 'ximgproc'

In [68]:
import cv2
from skimage.segmentation import slic
from skimage.util import img_as_ubyte

# Load the image
image = cv2.imread("image.jpg")

# Convert the image to RGB color space
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Convert the image to 8-bit format
image_8bit = img_as_ubyte(image_rgb)

# Perform the superpixel segmentation
num_segments = 100  # Number of desired superpixels
segments = slic(image_8bit, n_segments=num_segments, compactness=10)

# Create a mask for each superpixel
mask = np.zeros_like(image)
for label in np.unique(segments):
    mask[segments == label] = image[segments == label]

# Display the superpixel segmentation result
cv2.imshow("Superpixel Segmentation", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()


error: OpenCV(4.7.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [None]:

# Perform the superpixel segmentation
slic.iterate(10)

# Get the labels and number of superpixels
labels = slic.getLabels()
num_superpixels = slic.getNumberOfSuperpixels()

# Create a mask for each superpixel
mask = np.zeros_like(image)
for label in np.unique(labels):
    mask[labels == label] = image[labels == label]

# Display the superpixel segmentation result
cv2.imshow("Superpixel Segmentation", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()