# Exploratory Data Analysis for VOC Segmentation

In this notebook, we will explore the VOC 2012 dataset, visualize some samples, and analyze the distribution of classes.

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2

# Define the path to the VOC2012 dataset
data_dir = '../data/VOC2012/'
image_dir = os.path.join(data_dir, 'JPEGImages')
mask_dir = os.path.join(data_dir, 'SegmentationClass')

# Check the number of images
image_files = os.listdir(image_dir)
print(f'Number of images: {len(image_files)}')

In [2]:
# Function to display images and their corresponding masks
def display_samples(num_samples=5):
    plt.figure(figsize=(15, 10))
    for i in range(num_samples):
        img_path = os.path.join(image_dir, image_files[i])
        mask_path = os.path.join(mask_dir, image_files[i].replace('.jpg', '.png'))
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        plt.subplot(num_samples, 2, 2 * i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.subplot(num_samples, 2, 2 * i + 2)
        plt.imshow(mask)
        plt.axis('off')
    plt.show()

# Display some samples
display_samples()

In [3]:
# Analyze class distribution
class_names = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'cow',
              'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

class_counts = np.zeros(len(class_names))

for mask_file in os.listdir(mask_dir):
    mask = Image.open(os.path.join(mask_dir, mask_file))
    unique_classes = np.unique(mask)
    for cls in unique_classes:
        if cls < len(class_counts):
            class_counts[cls] += 1

# Plot class distribution
plt.figure(figsize=(12, 6))
plt.bar(class_names, class_counts)
plt.xticks(rotation=90)
plt.title('Class Distribution in VOC 2012 Segmentation Dataset')
plt.xlabel('Classes')
plt.ylabel('Counts')
plt.show()