In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
#from datasets import Dataset, DatasetDict, Features, Value, Image
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from IPython.display import display

In [2]:
train_df = pd.read_csv("/media/sergio/6A4A30C94A3093B3/Users/sergi/Desktop/datasets/imaterialist/train.csv")
train_path = '/media/sergio/6A4A30C94A3093B3/Users/sergi/Desktop/datasets/imaterialist/images/train'
test_path = '/media/sergio/6A4A30C94A3093B3/Users/sergi/Desktop/datasets/imaterialist/images/test'

In [None]:
with open('C:\\Users\\sergi\\Desktop\\datasets\\imaterialist\\label_descriptions.json', 'r') as f:
    label_descriptions = json.load(f)

In [3]:
# copied from https://ccshenyltw.medium.com/run-length-encode-and-decode-a33383142e6b
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated: [start0] [length0] [start1] [length1]... in 1d array
    shape: (height,width) of array to return
    Returns numpy array according to the shape, 1 - mask, 0 - background
    '''
    shape = (shape[1], shape[0])
    s = mask_rle.split()
    # gets starts & lengths 1d arrays
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    # gets ends 1d array
    ends = starts + lengths
    # creates blank mask image 1d array
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    # sets mark pixels
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    # reshape as a 2d mask image
    return img.reshape(shape).T  # Needed to align to RLE direction

In [None]:
# Function to create a multi-class segmentation mask
def create_multiclass_mask(image_id, df):
    """
    Create a multi-class segmentation mask for an image
    
    Args:
        image_id (str): Image ID
        df (pd.DataFrame): DataFrame containing RLE encoded masks
        
    Returns:
        np.ndarray: Multi-class segmentation mask
    """
    # Get all rows for this image
    image_df = df[df['ImageId'] == image_id]
    
    # Get height and width
    height = image_df['Height'].iloc[0]
    width = image_df['Width'].iloc[0]
    
    # Create empty mask
    mask = np.zeros((height, width), dtype=np.uint8)
    
    # Fill mask with class IDs
    for _, row in image_df.iterrows():
        class_id = row['ClassId']
        encoded_pixels = row['EncodedPixels']
        
        # Decode RLE
        class_mask = rle_decode(encoded_pixels, (height, width))
        
        # Add class to the mask (only where class_mask is 1)
        mask = np.where(class_mask == 1, class_id, mask)
        
    return mask

In [None]:
unique_image_ids = train_df['ImageId'].unique()
# Create a directory to save the masks
os.makedirs('segformer_data/masks', exist_ok=True)
os.makedirs('segformer_data/images', exist_ok=True)
# Process a sample of images (adjust the range as needed)
sample_size = min(5, len(unique_image_ids))  # Start with a small sample for testing
for image_id in tqdm(unique_image_ids[:sample_size], desc="Processing images"):
    # Create multi-class mask
    mask = create_multiclass_mask(image_id, train_df)
    
    # Save the mask
    cv2.imwrite(f'segformer_data/masks/{image_id}.png', mask)
    
    # Copy the original image (assuming images are in a directory named 'train_images')
    # Adjust the path as needed
    os.system(f'copy {train_path}\\{image_id}.jpg segformer_data\\images\\')

# Visualize all masks in the masks folder
mask_files = os.listdir('segformer_data/masks')
num_masks = len(mask_files)
rows = (num_masks + 2) // 3  # Calculate number of rows needed (3 images per row)

plt.figure(figsize=(15, 5 * rows))
for idx, mask_file in enumerate(mask_files):
    mask = cv2.imread(f'segformer_data/masks/{mask_file}', cv2.IMREAD_GRAYSCALE)
    
    plt.subplot(rows, 3, idx + 1)
    plt.imshow(mask, cmap='tab20')
    plt.title(f'Mask: {mask_file}')
    plt.colorbar(label='Class ID')


In [4]:
from concurrent.futures import ThreadPoolExecutor, as_completed

# ---------------------------
# Mask Creation Function (per image group)
# ---------------------------
def create_multiclass_mask_from_group(group, target_size=(512, 512)):
    height, width = target_size
    mask = np.full((height, width), 255, dtype=np.uint8)
    
    for _, row in group.iterrows():
        class_id = int(row.ClassId)
        decoded_mask = rle_decode(row.EncodedPixels, (row.Height, row.Width))
        resized_mask = cv2.resize(decoded_mask, (width, height), interpolation=cv2.INTER_NEAREST)
        mask[resized_mask == 1] = class_id
    
    return mask

# ---------------------------
# Image Processing Function (parallel unit)
# ---------------------------
def process_single_image(image_id, group, image_dir, output_dir, target_size=(512, 512)):
    try:
        # Create and save multiclass mask
        mask = create_multiclass_mask_from_group(group, target_size)
        mask_path = os.path.join(output_dir, "masks", f"{image_id}.png")
        cv2.imwrite(mask_path, mask)

        # Copy original image
        src_path = os.path.join(image_dir, f"{image_id}.jpg")
        dst_path = os.path.join(output_dir, "images", f"{image_id}.jpg")

        if os.path.exists(src_path):
            img = Image.open(src_path)
            img = img.resize(target_size, Image.BILINEAR)  # Resize to match mask
            img.save(dst_path)
    except Exception as e:
        print(f"Error processing {image_id}: {e}")

# ---------------------------
# Main Dataset Processor
# ---------------------------
def process_full_dataset_parallel(df, image_dir, output_dir, target_size=(512, 512), num_workers=8):
    os.makedirs(os.path.join(output_dir, "masks"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "images"), exist_ok=True)

    grouped = df.groupby("ImageId")

    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [
            executor.submit(process_single_image, image_id, group, image_dir, output_dir, target_size)
            for image_id, group in grouped
        ]

        for _ in tqdm(as_completed(futures), total=len(futures), desc="Processing full dataset"):
            pass

In [5]:
process_full_dataset_parallel(
    train_df,
    image_dir=train_path,
    output_dir="/home/sergio/datasets/imaterialist_processed",
    target_size=(512, 512),
    num_workers=14
)

Processing full dataset: 100%|██████████| 45623/45623 [22:20<00:00, 34.03it/s]
